Skip to content

Commit

Permalink
ENH: add sample_dict method to MultiFASTADirectoryFormat (#311)
Browse files Browse the repository at this point in the history
* ENH: add sample_dict method to MultiFASTADirectoryFormat

* Add a MultiMAGSequencesDirFmt -> MultiFASTADirectoryFormat transformer

* ENH: add FeatureData[Contig] type

* Lint

* Add missing files

* Revert "ENH: add FeatureData[Contig] type"

This reverts commit edbd24a.
This commit was moved to a separate branch/PR.
  • Loading branch information
misialq authored Mar 11, 2024
1 parent e25f935 commit 9f240ea
Show file tree
Hide file tree
Showing 10 changed files with 355 additions and 3 deletions.
49 changes: 48 additions & 1 deletion q2_types/per_sample_sequences/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,13 +597,60 @@ def _validate_(self, level):

class MultiFASTADirectoryFormat(MultiDirValidationMixin,
model.DirectoryFormat):
sequences = model.FileCollection(r'.+\.(fa|fasta)$', format=DNAFASTAFormat)
pathspec = r'.+\.(fa|fasta)$'
sequences = model.FileCollection(pathspec, format=DNAFASTAFormat)

@sequences.set_path_maker
def sequences_path_maker(self, sample_id, mag_id):
# write out with fasta extension, regardless if input was fa or fasta
return '%s/%s.fasta' % (sample_id, mag_id)

def sample_dict(self, relative=False):
"""
Returns a mapping of sample id to another dictionary where keys
represent the MAG ID and values correspond to the filepath for
each MAG.
Parameters
---------
relative : bool
Whether to return filepaths relative to the directory's location.
Returns absolute filepaths by default.
Returns
-------
dict
Mapping of sample id -> dict {mag_id: mag_filepath} as
described above. Both levels of the dictionary are
sorted alphabetically by key.
"""
mags_pattern = re.compile(self.pathspec)
ids = {}
for d in self.path.iterdir():
if not d.is_dir():
continue

sample_id = d.name.rsplit('/', 1)[0]
if sample_id not in ids:
ids[sample_id] = {}

for path in d.iterdir():
if not mags_pattern.match(path.name):
continue

mag_id = os.path.splitext(os.path.basename(path.name))[0]
absolute_path = path.absolute()
if relative:
ids[sample_id][mag_id] = str(
absolute_path.relative_to(self.path.absolute())
)
else:
ids[sample_id][mag_id] = str(absolute_path)

ids[sample_id] = dict(sorted(ids[sample_id].items()))

return dict(sorted(ids.items()))


class MultiMAGSequencesDirFmt(MultiFASTADirectoryFormat):
manifest = model.File('MANIFEST', format=MultiMAGManifestFormat)
Expand Down
14 changes: 14 additions & 0 deletions q2_types/per_sample_sequences/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------

import os
import shutil
import functools
import re
import warnings
Expand Down Expand Up @@ -276,3 +277,16 @@ def _29(ff: MultiMAGManifestFormat) -> pd.DataFrame:
lambda f: os.path.join(ff.path.parent, f))
df.set_index(['sample-id', 'mag-id'], inplace=True)
return df


@plugin.register_transformer
def _30(dirfmt: MultiMAGSequencesDirFmt) \
-> MultiFASTADirectoryFormat:
result = MultiFASTADirectoryFormat()
for sample_id, mag in dirfmt.sample_dict().items():
os.makedirs(os.path.join(result.path, sample_id))
for mag_id, mag_fp in mag.items():
shutil.copy(
mag_fp, os.path.join(result.path, sample_id, f"{mag_id}.fa")
)
return result
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample-id,mag-id,filename
sample1,mag1,sample1/mag1.fasta
sample1,mag2,sample1/mag2.fasta
sample1,mag3,sample1/mag3.fasta
sample2,mag1,sample2/mag1.fasta
sample2,mag2,sample2/mag2.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
>k129_5480
TTATTTTCAAGATAATGAGCCAATTTAAGCGGTGTCTGGCCGCCAAGCTGCACGATCACA
CCTTTAACTTTCCCATGCTCATTTTCTGCTTCAATCAATGACAATACATCTTCGCCTGTG
AGCGGCTCGAAATATAATCTGTCAGAGGTATCATAATCCGTTGAAACGGTTTCAGGATTA
CAATTAACCATGATTGTTTCATAACCCGCCTCTTTTAGTGCATAGGCGGCATGGACACAG
CAATAATCAAATTCAATACCTTGCCCGATACGGTTTGGCCCGCCACCTAGAATAACGATT
TTGTCTTTTTTAGTTGCTGTAATTTCAGAAGTAGAATTAAGTGTTTCATAGGTGCCGTAC
ATATACGATGTTAATGACGGTATTTCTGCCGCACAGCTATCCACCCGCTTATAAACAGGT
TTTACTTTGTGCATCAAACGTGTTTTACGGATCGTTGCTTCTGCAACCCCTACCAATTCA
GCCAGACGCGCATCCGAAAATCCTGCGCGCTTCAATGCCATCCATCCCTGAGGATCTTTC
GGCAGGCCGTTTTTCTTAATGGAGGCCTCAGTATCAATAAGAGATTTTATACGCTCTAAA
TACCACATATCAAATTTTGTTAATTGATAGATAGTTTCTAAATCCATACCGTGTCGCATC
GCTTCGGCTGCATAGAGTAAGCGGGCTGGCGTTGGACGTGAAAGTGCTGCCCGAATATCG
TCCATATCAGGCTCAGACTTACCAGCAATCGGAATGGAGCTAAGCCCCTCTAAGCCCTTT
TCTAAAGAGCGCAAAGCTTTTTGCAGAGACTCTTCGAAGCTACGCCCTATAGCCATGGCT
TCACCGACTGACTTCATTGCTGTGGTTAAGGTGTTATCAGAGCCTTTAAATTTCTCGAAA
GCAAAACGAGGCACTTTTGTCACGACATAATCAATGGATGGCTCAAAGGCTGCGGGTGTT
TTGCCGCCTGTAATATCATTGCCTAATTCATCAAGTGTATACCCTACCGCCAATTTCGCT
GCCACTTTAGCAATCGGAAAACCTGTAGCTTTTGAGGCTAAAGCAGAAGAACGAGACACA
CGAGGGTTCATCTCAATCACCACCATACGGCCTGTCTCTGGATCCATTCCAAATTGGACA
TTCGATCCACCTGTTTCAACACCAATCACACGAAGTACGGCCAATGAGGCATTGCGCATG
ATTTGATACTCTTTATCTGTCAGTGTTAAGGCTGGAGCAACGGTAATAGAATCACCTGTA
TGCACGCCCATAGGGTCAATGTTTTCAATCGAACAAATAATGATAGCGTTGTCCTTTGTA
TCACGAACAACCTCCATCTCGTATTCTTTCCAACCCAATAAACTCTCATCAATCAACACT
TCATTGGTTGGTGACGCATCCAAGCCTTCACGAATGATTTGTTCAAACTCATCTTTGTTA
TAAGCAACCCCGCCACCAGAACCACCCATGGTAAAGGATGGACGAATAATCGCTGGTAAG
CCTGTATGTTTCAGAGCCTCTCTAGCCTCTTCCATAGAATGCACCACCGCACTTTTAGGA
CTTTCAAGACCAATCTTCTCCATACAATCTTTAAATAATTGGCGGTCTTCAGCCTTTTCA
ATGGCTTCTTTATTGGCACCGATCAGTTCAATATTGAGTCTTTTTAATACACCCATTTTA
TCAAGAGCCAGTGCAGCATTCAGTGCCGTCTGACCACCCATGGTTGGAAGCAACGCATCG
GGGCGTTCTTTTTCTAAAATCTTTGCGACAATTTCTGGGGTGATTGGCTCAATATAAGTC
GCATCAGCCATATTCGGATCAGTCATAATTGTGGCTGGATTAGAATTAATCAGGACAACG
CGGTACCCCTCTTCTTTCAGCGCTTTACAGGCTTGTGCACCTGAATAGTCAAATTCACAG
GCTTGACCTATCACGATAGGACCAGCGCCAATAATACAAATGGAGGAAATGTCGGTGCGT
TTAGGCATGTGAATCTCGGTTTCTTTTTTTTATACTTACCGAGAGTTAGTTTATGCACTT
ATCAGGGTGTGCAGACAAGCTCTTTCTTGACCTTACCCGCAAGTTTAGCTATATTCTATC
AACAGCCCGCCCTTGATGGCGGGTTATTTTATTGAAAAGGTGCAAGGCTATGCAAAAAAT
ACCCTTAACAAAACAAGGCCACACAGACCTTGAAGCAGAATTAAAAGATTTAAAACACCG
CCAACGTCCAGCGGTTATTGCTGCGATATCTGAAGCCAGAGAACATGGCGATTTATCAGA
AAACGCTGAATATCACGCCGCCCGTGAGCAGCAAAGCTTTATCGAAGGTCGTATCGAGCA
AGTCGAAGCTATTTTATCGCTCGCTGAGATTATTGACCCGGCCAAAATTTCTGGTGACAC
GGTAAAATTTGCAGCAACTGTTAAAGTCGTTGATTGTGACACAGATGATGAACATATCTA
CCAAATCGTCGGTGATGAAGAATCAGACATTGAAACAGGAAAACTGGCTATCTCGTCACC
TGTTGCCCGCGCTTTAATCGGCAAAAAAGTTGAGGACTCAGTCGAAGTCCGCACACCAAA
AGGCACAAGAGAATACGAAATTTTAGAAATTCTGTATAAGTAATTTCTATTCTTCGATCG
GTACGCCAGGCTTCTTGAAATTACGTTTCATAATAAGTGATGACTTAACAGAGCGAACAT
TTTTTAGCGCTGTCAGTTCTTCTGTAATAAAACGCTGATAAGCATCCCAATCTTTGGCCA
CAATACGGAGTGTGAAATCCATATCACCCGCAATCATGTAACAATCACGAACGAGATCCA
TTTTCTCAACGGCTTTGATAAAGGCCTGAAGGTCTTTTTCTGAAGTGTCTTCTAAAGCTA
CATTGGCAAAAACCGCCACACCATAGCCTAACATTGAAGCACTTAAATCCGCATGATAAC
TTTGGATATAACCATAATCTTCCAAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
>k129_5112
CCCCGGAAAGGGCTGGCGACCGACGATGACCTCGGGAAGCCCCAACTCGCGGCCGATGGC
GCGTACCTCGTCTTTGAAGAGGGTGCGAAGGGGCTCGACGAGGTCGAACTGGAGGTCTTC
GGGCAGCCCACCGACGTTGTGGTGGCTCTTGATGTTGGCGGTTCCAGCCCCGCCACCGGA
CTCGACGACATCCGGATACAGGGTGCCCTGCACGAGGAAGCGGATGGGTTCGCCGTCGGC
CTTGGCCTCATCCACGAGCTCGCGCTGCACCCGCTCGAACGCACGGATGAACTCGCGACC
GATGATCTTGCGCTTCTCTTCGGGATCGCTGACGCCGGCGAGGGCCTCGAGGAACGTCTC
GCGGGCGTCGACGGTCACGAGGCGCACACCGGTCGAGGCTACGTAATCCTGCTCGACCTG
TTCGCGTTCGCCCTTGCGCAGCAGGCCGTGGTCGACGAACACGGCAACGAGCTGGTCGCC
GACTGCCTTGTGAACGAGGGCCGTCGAGACAGCCGAGTCGACTCCGCCCGACAGCGCCGA
GAGCACACGACCCGAGCCGACCTGCGCGCGGATCCGATCGACCTGCTCGGCGATGACGTT
GCCGCTGTTCCAGTCTGCGGGGAGGCCCGCAGCCTTGTGCAGGAAGTTCTCGATGATGCG
CTGCCCGTGGTCGGAGTGCTTGACCTCGGGATGCCACTGCACACCGTACATGCGGCGAGC
GTCGTTGCCGAAAGCGGCGACCGGGGTGGCACCGGTGCGGGCGAGCACCTCGAACCCGGC
GGGGGCTTCGGACACCTGGTCACCATGGCTCATCCAGACGTTCTGCTCCGCGGGCTGGCC
ATCGAACAGTACGCTCTCGTCACGGATGATGCTGGCGTCAGTCGCCCCGTACTCGCGCAG
CCCCGTGTTCGCAACGACGCCACCGAGCGCCTGCGCCATGACCTGGAATCCGTAGCAGAT
GCCAAGGGTCGGAACGCCCAGGTCGAACACCGCCGGGTCGAGCGTCGGCGCGCCAGGCTC
GTACACCGATGACGGTCCGCCCGACAGGATGATGCCGATCGGATCTTTTGCGGCAATCTC
TTCAGCTGTCGCGGTGTGCGGAACCAGCTCGCTGTAGACGCCCGCTTCGCGCACGCGACG
GGCAATGAGCTGGGCGTACTGCGCGCCGAAGTCGACGACGAGGACGGGTCGCTGCGAGGT
CTCGGTCTGTTCTGTCACCGGATGCTTTCGGTCGGCGCCCCTGGAACCCAGGAGCGAAGG
TCAGGACACTGTGGGGTTCTGGCGGGTCACGCTGGAGTGTTCGGCGAGATCGTGGTTCTC
GGACTCGCGCGCAGCAAGGTACGTCTTGACCTCACGGGCGACCCGTGCCTCCATGAAGAA
CGACAGGAACGGGACGATTCCGCCCAGCGCGAGGGCGATGAACCGACCGAACCGCCACCG
CATCAGGCTCCAGATGCGGAAGCACGCGAAGAGGTACACGACGTAGAACCAGCCGTGGCC
GACGAGGATCGACAGCGACACATTGACGCCGTCGCCCGCCGACTCGAGGTCGCAGCCCAG
ACCCCCGGGCACGAAGAGCGAGTACCACTCGCATCCGGGCCCGACCAGCACCGGTGCGAA
CCAGAGGAAGCCACCGGACCCGCCGGCGAACAGTTCGACGTGCAGCGGCGAGTACTTGAG
GATCATCTCGGCCAGCAGCAGGAGCAGCATGACACCGGTGATGATCGAGGCGACCTGGTA
GAAGGTCAAGGCTCCGCGAATGGCCGGGAAAGACGACGGTTTCGGCTCACGGGGCATGGG
CCCATTCTAGTCGCCGGTTGCGGTCGCGCTTCCCGACGAGGATGCCGCGGCTGCGGCATC
CTCGAGCTCTTCGACTTCCTTCTCCCACGCATCCTTGGCGAGGCGGTACCAGAAATAGAA
GGCGAAGCCGGCGAAGACCACCCACTCGGCGGCGTAGAAGATGTTCAGCCAGTTGACTGT
GGACCCGGCATCCGGCGCGGGCGAGGCGATGTCCACAAGGCCCGCCGGCGCAGACTGCGA
GGCGATGTAGCTGCGATAGACGTCCAGGCCCGCGGTGTCGTGCCACTGCGACAGGAGCGC
CGCCGGCGACATCCGTGTCATCGTGAACGGCGGCTCGCCGCGCGGCGGCGGCACCGGACC
CTCGTCCGAGATCAACCGACCGACGACCGTCACGGACTCCCCCGCGACCGCAGTCTGCTC
GAGCGCCTCGGCGGCGGATTCGGCGACGGTGAGCGTCGGCGCCCAGCCGACGGCGACGGC
CACGGATGTCGGCGTCGCGGTGTCGGCGATACGCAGCTGACCGGTGACCCAGAAGCCTTC
GACGCCGTCGTTGAAGCGCGACGAAACGACAAGGAAATCCTCGGGAACCCACGTGCCCGT
CACCTCGACGCGCTGGCCCACGAGCGGCTCGGGAAGGTACTCGCCGGGCCCGGCGATCTC
GGCGAGCGGCCTGACCTCTTCGGTGGTCCCGGGCGGGAGTGGGTCGGTGTCGATAGCGCG
CGAGAGCTGCCACTGCCCGAGCCACGCGAACACCCCCGCTACGACGAGCGCGAGCAGCAG
GACGCCGATCCAGCGGGGTCGGAGCATGACCTCCCGCAGGGTCGGGGGAAAGACTGTCTG
GTCTGTCATCCGCCCGTATACGGCGCGACGACCACCTCGACGCGCTGGAACTCCTTGAGA
TCGGAGTACCCGGTCGTGGCCATCGACTTCTTCAGCGCCCCGATCAGGTTCGCGGTTCCG
TCAGCCACCGGAGCCGGACCGTAGAGCACGGATTCGAGGTTCGTCACCTGATCCACCTTC
ACGCGGCGACCGCGCGGGAGCTTCGAGTGGTGAGCCTCCGGCCCCCAGTGGTATCCACGA
CCGGGGGCGTCGGTTGCCCGCGAGAGAGCGACGCCGAGCATGACGGCATCCGCTCCCATC
GCGAGCGCCTTGACGATGTCGCCTGACGTTCCCACACCGCCATCGGCGATGACGTGGACG
TAGCGCCCGCCCGACTCGTCGAGGTAGTCGCGGCGCGCGCCGGCGACGTCGGCTACCGCC
GTGGCCATCGGGGCGTGGATGCCGAGAACCCCGCGCGTCGTCGAGGCTGCGCCCCCGCCG
AAGCCGACGAGCACGCCCGCGGCGCCCGTGCGCATGAGGTGCAGGGCTGCCGTGTAGGTC
GCAGCACCGCCGACGATGACAGGCACGTCGAGGTCGTAGATGAACTTCTTGAGGTTGAGG
GGCTCGTCGACGCTCGAGACGTGCTCGGCCGAAACCGTCGTGCCACGGATGACGAACAGG
TCCACACCCGCGGCGACCACGGTTTCGTACAGCTGCTGGGTGCGCTGCGGAGTCAAAGCA
CCGGCCACCGTGACTCCGGCGTCACGGATCTGCTGCAGTCGCTCACGGATGAGCTCGGGC
TTGATCGGCTCGGAGTAGAGCTCCTGCATCCGGCAGGTTGCCGTCGCCTCGTCGAGAGAC
GCGATCTCAGCCAGCAGCGGCTCGGGGTCGTCGTACCGGGTCCAGAGCCCCTCGAGATCG
AGGACACCGAGTCCGCCGAGCTGACCGAGCATGATCGCCGTCTGCGGGCTCACAACCGAA
TCCATCGGGGCGCCGAGCACCGGGATGTCGAACTGGAACGCGTCGATCGACCATGCGGTC
GAGACATCCTCGGGATTGCGGGTGCGGCGCGAGGGAACGACGGCGATGTCGTCGAACGAG
TACGCGCGGCGAGCCCGCTTGGCGCGGCCGATCTCGATC
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
>k129_6525
AAACTCTATCAAGCGTATACCAAAGTGAGTGGTGTATTGATCAGTCAGCTCATTATTGAA
TCGGACTTCTGTCTCCAATCGATATATTGATGGAGAAGAAGGGCTCCATAAAAGAGGATT
ATTAAGTTCTAAATTAACCTCTGCTGTCTCTTTATTTTCCGCTTCTACATTAAGCGAATG
ATCCATCTGCAGTAATGGTTTATCGCCTTCATTAGAAAATATTTTAAATACTACAGTAAC
TGCGCTCCAATGATTATACGTATTAAAGACCTCTGCGCGTAACAATAATTCTACTTTATT
TAGGTCTTGCTGATCATGGAAATGGCAGCGCACTTGAGTTCCGCAGTAGGGCACATAGAC
GCAATTAGTAGCTATTAGTCGAACATCGCGGTAGATGCCGCCGCCCTCATACGACCATAG
TTCAAATTCTCTGGCATCGCAGCGAACCGCGACCACGTTCGGCACATTCGCATCGCAAAG
CTCAGTGATATCGAGAGTGAAACTGGTGTAGCCTGATAGATGCCGCCCAGCTAAATGGCC
ATTTACCCATATTGTTGCATCTCGGTAAATGCCATCAAATTCAAGGTGAATACGCTGCTT
GCTAGCTTCTTTGGGAATTTCAAACGTTTTGCGATACCAGCCTACATCAGTCGGCAGTGA
GCCATGCACAGCATTCGCGGATGCCCGAAATTCGCCTTCAATTACGAAATCATGAGGTAG
GTTTATGTCGCGCCATGCTTCATCTGGATAGCCCAAGCGCGCGACCCCATGGTTTCCTGC
CTTTAACCACTCGGCTCGCTTAAAACGATTAGCATGAATGGCTTGATGGTTAGTGCTGTC
TAGCTCACCTCGATGAAACTTCCAACCTTGGTTAAATTCATAAGTGGTACGCATAGAATT
ACTGATGTCTTTTAAAAGATTCTACAAGTGGAGTCTATTAATTATTTGATAAGTTACTCT
GATTATTTTTAGAGATTTCTAATACAACTCCGCTGCACGTGCCGTAACGTCCGCCTTGGT
ATGCGCAAAACAGATGGGTGGGGACGCCTTCAGAGTTAATCAGTAACTGCGGTCGTTCGA
ATCGCCCTTCACGATCCAGTCCAGGCAATGTCTCGTCGAAGTAAGTTCCAGCATCTTTGT
AAGCAACCTGCGGATTTTGCCAAATAAGGCCATCGCTGGATTCCATATACAGCCCATACT
CGTGGTTATAAAAACCCATGTCACGCATGATTATTTTATATGGTGCGCAATCCTTCGGTT
CGTACCATGTGTACGCGTC
>k129_6531
TCGGATTTGCCGAATGCTTTTTGTAAGGGCCTTCAATTGATTTGGCGATAGCGAGCCCGT
ATTTACGGTTGCCGTTAAATCGCCGCCCAGTGTCCCAATCCCAACCTTTATAGTAGAGCC
AATATTCGCCATTAGGATTTTGTAATAGCGATGGGTTGCTAACGACGGCATCGTCCCAAT
CGCCATCGCTGCCAACATTAATAACAGGTTCATCTCCAACGCGTCGCCAAGGTCCATTTA
TATTATCGGCAATGGCTAAACCGATGCGTTTGGTGTAGACTAATTGATTGAAGTATTTTT
CGTACTCTGCAGTAGATAAATTGGGTAGCTCATTTTGCTCGATATCTAGTTTCGAGCCAT
CTGCTCCCATGTAGAAAAGAGCATACTTGTCGCCGACCTTTTGCACAGTCGGATTGTGGA
TTGCCCATGAGTCCCAAGCATTTGCACCGCTGCCTTTTAGAACGACTCCTAAATCTTCGT
AGGGGCCTTCCGGAAGATCAGCAACCGCATGGGCCACTTCGCAGGCACTTACCCAACCAG
AAAATGTATACTCGTTTCGCCAACGTGAGTAAAAAACATGAATGCGCCCGTCGGGTCCGT
AGATAGGCGAACAGCACCAAACATGATAGCCTTCTACTTCAAGAATTCGCCCCAGTGGTT
TGAGTTTTTGCTCGAAGTTCGAAGTGCTTACTTCAGAGGTGATGGGACGTAGCTTCTGTA
AATTAATGAGCGACTTATTGCTAACTGTAGAGTCCATGAAAAAAAGGTAAACTTTATACG
AGTAATGTTATGCTCCTTAAAACTGTCAAGGTTTAGGCATTTTGCTGAGCATTATGGTGT
TTAATGGGCTTGAATCATAACAGGATTAAGCGACATTTAAATATTAATGATAAGAATTAG
TGATATAGCTAAAGAGTTAGGGCTTTCGAGGGTTACAGTCTCGGCTATTTTAAATGGACG
ACACCAGAAAATAGGTATTTCCGAAAAGACCGCGCAAAGGGTTCGTTCGAGTGCAAAGGC
TATGGGTTATCTACCCAATCAGAATGCATTGAGTATGAAGAGAGGTCGAAGCATGACTAT
TGGTATGCTGAGTAGTGCGCTATCGGAGGAGTGGGGTGCTAAAATTCTTGTTGGTGCATT
AAGTGCGATAAAGAACACGCCTTATTCACTGCGCGTTGAGTCAGTACAGGGAGCAGCAGA
AGAGCGCGGTGCCCTAGAGCGCCTCTTGGGGTCACGAATTGAAGGGTTGTTGTGCTGCAA
TATAAAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
>k129_4684
TGATACCGACGCGGCACTTGAGTGCGCGCTATCCTTCAAGGAAGCCACATGCGTTATTGT
TAAACATGCAAACCCCTGTGGTGTTGCAACTGGGTCATCATTACTTGAGGCATATGAGGG
AGCTTACGCTACAGACCCAACATCTGCTTTTGGTGGCATTATTGCTTTTAATCGAGAATT
AGATTCAAAAACAGCAAAGGCAGTTATTGATAGACAGTTTGTTGAGGTAATCATTGCCCC
ATCTATATCACCCGACAGCATTAAAATTATTGCAAAAAAAGATGGTATACGTTTATTAGA
AGCTGGTTCACGACAAGAAGACATCAAAACTCTTAACATGAAGCGAGTCAGCGGAGGTTT
ATTACTGCAAGACAATGATATTGGGATTATTGATCGTGGTGATATAAAAATTGTTTCAAA
CGAGGTAATTG
>k129_5618
GTGCTAATCGCACCCTCATGAGCGACACCATTATTCTTTATTTTTGAGTCTTCAGCAAAA
ATAAATACAGTCAAGTCACAACTCTTAGCGTATTCAAATGCGCGTCTTAATACTTCTGTA
TTTTCAATAGCAACATCACCATTACTTACGCCGATGCAGCCTGCAGCTTTTAATAAAAAC
ATCTCTGTCAGTTCTTTTCCATTTAGTTCTTGTGTTAAAGCGCCAAGTGGAAAAATATTT
GCGCGGTTAGATTCACTAGCACGCCGATTTATAAATTCCACAATGGCCGGCGTATCAATA
GTTGGTTGTATATCAGGTGGGACACAAATTGATGTTATACCACTACGGTTTGCAGCTTTA
AGTTCATTTTTGATAGCTATGTTTTTTTCTGAACCAATTTCGCCAAGCCTACCGCAAATA
TCAACTAATCCAGGTAAAATTATTTTATCTTTTGCGTTTATATCCAGATCCGATTTAAAA
>k129_5631
TCATGATGATCCAAAAGCAGTTGCGGAAGCATCTGGGATAATTACGCGGAGTGGATGTCG
CCGAATCGCAAGATTTACTTTTGATTATGCTATTAAAACAGGAAGAAAAAAAATTACAAT
AGTTCATAAAGCAAATATCCTAAAAGCTCTAACAGGTCTGTTTCTAGAAACAGCAATGGA
AATCGGCAAAGAGTATGGAAATAAACTGGAAATTGAAGAGCGAATTGTCGACAACACAGC
AATGCAATTAGTAATCGATCCAGCGCAATTTAATATAATACTAACAACAAATATGTTCGG
TGATATTCTCTCAGATGAGATTGCGGGTCTAATAGGCGGACTCGGGTTGGCGCCAGGGGG
GAATATTGGTGATGATATAGCAATTTTTGAAGCGGTACACGGAACGGCTCCTGATATTGC
TGGAAAAGGGATTGCGAATCCAACAGCACTTTACCTAGCTTCAGCAATGATGTTGGAACA
TATAAATCAAAATAATATGGCCAATAACCTAAGGAAAGCAATTAGAGAAACATTGAAGAA
TAAAAAAAATCGCACAATCGATCTAGGTGGCGAAGCATCCACAAAAGATTATATGTCATA
TGTTATCGATAATTTAAACTAGAAAACAAATGAATGCACTTATACTCTTAGCACATGGAA
GTCGAAGAAGTGAATCTAACCTCGAAGTAGAGAGTTTATCAAATGAAATTTATGCGCTGA
TTAGCAACAAATT
>k129_2817
GTCGCCAATTAGCAACTATGATGTCTTCTGGAGTACCTTTGGTCCAATCATTTGAAATCA
CAGGTCGTGGCCATGACAACCCAGGAATGCAGAGCCTAATTTTAGCCATCAAGGCTGATG
TTGAATCTGGAAATAGTTTGGTTGATGCCCTTAGAAAACATCCATTACATTTTAACTCGC
TTTATTGCAATTTAATTGAAGCTGGTGAACACGCCGGTATTTTAGAGGCAATTTTACACA
AATTAGCAACATACTTAGAAAAGACAGAAGCTCTGAAATCAAAAATAAAATCGGCTTTAT
TTTATCCAATGGCAGTTATTGTCGCAGCAATTATTGTGGTAACAATTCTGATGATATTTG
TAATACCTCAATTTTCTGAATTATTTGGAAGTTTTGGTGCTGACTTACCGGGTTTGACAC
AATTTTTAATAGATGCATCAGATTTCTTTGTTAGCCACTGGTGGAAATTATTTGGGTTAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
>k129_5401
CCATTGTATGTCTTTAGGTAGCTCCTCATGTTTGAGGTTCATGTCTTGGATTTTGTTTTC
TCCAAAAATCCTTTGTCCTGCATCATAAGCTTGCATTACTTCCTCATTGGATTTAGTTTT
AGAAACAGCCACCAGTTGGACTGATGGGGGTATCTCCTTGATTATCCTAGAAATATTTTC
TGCTATATTCATAATACAAACTTACAATTTTCACAGAGTATTTTTTAAAGAATGAATTGA
AATTGAAGTTGAATTAAAGCATTTAAAATTTACAACATTCCATGATTTGATGTGCAAGTT
TCAAAGCACGGGTTCCGCTTTCCAGAGGAACAATAATTGGATCATTATTTTCGATGGCAT
CAGCAAAAACCTCTAACTCTTCCAGTATGGCGTTGGAATCTTCAATTCGTGGATTTTCAA
AATAGATTTGTTTTTTTTCTCCCTGTGCATTTTCAAGAATCATGGCAAAGTCTTTCGGTT
TTTTTGGAGCTTTTTTCATTTTTACTACCTCGACTTTTTTCTCTAGAAAATCTACTGCGA
TATAAGTGTTTTTCTGGAAAAAGCGCGTTTTACGCATTTTTTTTAGTGAAATCCTGCTTG
CGGTAAGGTTGGCAACACAGCCGTTTTCAAACTCAATTCTTGCGTTAGTTATATCGGGGG
TGGAGCTTATTACCGCCACCCCAGAAGCGGATACGGATTTTACTTTGGCATCGACAACAC
TCAACAATACATCAATATCGTGAATCATCAAATCTAAAACCACTGGAACATCGGTTCCTC
TTGGATTGAATTCTGCCAAACGATGGGACTCTATAAATTTTGGGTGGGTAATCGATGATT
TTACCGCCTTAAAAGCCGGATTAAAACGTTCTACGTGTCCCACCTGACCAAGAACCATTT
TTTTTGTAGCCATTTCGGTAATTTCAAGGGCTTCGGTAACATTATTGGCAATAGGCTTTT
CTATAAATATATGCTTTCCTTTTTGGATTGCATTTATGGCATTTTTATGGTGGAAAAATG
TAGGAGTGACAATATCTACCATATCGCAAGCTTGGATCAAATCGGCTTCACTTTTAAAAG
CGGTATAGCCATTTTCTTTAGCTAAGGCCTTGGTGTTTTTTTTATCCTGATCATAAAACC
CCACTAATTGGTATCGTTTAGAGGCCTCTAGTAAACGCAGATGAATTTTACCCAAATGTC
CTGCACCCAAGACGCCGACTTTTATCATAGCACTTAATTTTTAATCAAAAATACCATCTT
TTTCTGATTTTTTTTTGGAAGTAATTACATTTGTCCTCATGATTGATTCAACCAAGCATC
AAGGACAAAGAAGACAGTTGGTAAAATTGTTAGAGGAGAAAGGAGTCTACGACAAAAGGG
TTTTGAATGCTGTTGGAAGTGTTCCCCGTCATCTGTTTATGGATTCGGGTTTGGAGGAGT
ATTCCTATATTGACAAAGCCTATCCCATTGCGGCTAATCAGACCATATCACAGCCTTACA
CCGTAGCTTTTCAGACCCAATTGCTGGAACTTCAGAAAGGGGATCGAGTTTTGGAAATCG
GAACGGGTTCGGGCTATCAAACAGCTATTTTAATCGCCCTTGAAGGTCTAAAAGTGTATA
CCATTGAACGCCAACTGGAGTTATATAAAACAACTGTTTTGTTATTTAAAAAGTTGGGGT
TAAATCCCAAAAAAGTGATATTTGGTGATGGTTACCAAGGTTTACCAGATCAAGCACCTT
TTGATGCCATCATCGTTACTGCAGGTGCGCCTCAAGTACCCAAACCTTTGTTGGAACAAT
TGACCATTGGAGGGAGACTCGTAATCCCTGTGGGAGAGAAAGACCAAGTCATGACCCGAT
ATATGCGAACAGGGGAAAAGACCTTTGATCGACAAACCTTTGGGAATTTCAGATTTGTCC
CTTTGCTAAAGGATGAGAGATAGAGCTTGTTAAGTACTTCGTGAATATCGGATTTTCCTT
ACTGAATTTATAGCTCTTGACAATATCAATTGTTTGAAGATGGAAGGTGAAGTATACTTC
AGGCTTCGTAGCTGATAAGAATATTCACTCTTCGATTTTATAAATTTTGTTAAAAAATTG
CTCTACGTCGGTAGTTTTTTGGATTGATTAGAGCAGGTTTACTGTGTGTTGTAAAATTTT
TATAATCTTTAATTTGAGGTTGTTCACTATAATTTGGTGAGAAAAACTATTTATTGAAAT
TTTTTTTAATCCTATCTAAATCCCTTTTATTGTCTAAATCTTTAAGGGCTTCTCTTTTAT
CGTAAAGTTTTTTCCCCCGAGCTAAAGCGATCAACATTTTGGCAAAACCTTTTTCGTTGA
TGAAAAGCTTAAGTGGTACAATGGTCAAGCCGGAATTTTTCACCTGTTTGAAAAGTTTGT
TTAATTCTCTTTTTTTAA
Loading

0 comments on commit 9f240ea

Please sign in to comment.