Skip to content

Commit

Permalink
more changes
Browse files Browse the repository at this point in the history
  • Loading branch information
nathandunn committed Jun 11, 2020
1 parent 2fcb383 commit cf971b4
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 77 deletions.
18 changes: 13 additions & 5 deletions apollo/annotations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,17 +1261,23 @@ def _get_subfeature_type(self, rec):
def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=None,
disable_cds_recalculation=False, use_name=False, verbose=False):
type = self._get_type(rec)
print("type " + str(type))
subfeatures = self._get_subfeatures(rec)
if type in util.gene_types:
print("is gene type")
if subfeatures is not None and len(subfeatures) > 0:
feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name,
print("has sub features")
feature_data = util._yieldApolloData(subfeatures, use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
print("output feature data" + str(feature_data))
new_transcript_list.append(feature_data)
else:
print("NO sub features, just adding directly")
feature_data = util._yieldApolloData(rec.features, use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
print("output feature data" + str(feature_data))
new_feature_list.append(feature_data)
if type in util.pseudogenes_types:
elif type in util.pseudogenes_types:
if subfeatures is not None and len(subfeatures) > 0:
feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
Expand All @@ -1280,11 +1286,11 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=
feature_data = util._yieldApolloData(rec.features, use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
new_feature_list.append(feature_data)
if type in util.coding_transcript_types or type in util.noncoding_transcript_types:
elif type in util.coding_transcript_types or type in util.noncoding_transcript_types:
feature_data = util._yieldApolloData(rec.features, use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
new_transcript_list.append(feature_data)
if type in util.single_level_feature_types:
elif type in util.single_level_feature_types:
feature_data = util._yieldApolloData(rec.features, use_name=use_name,
disable_cds_recalculation=disable_cds_recalculation)
new_feature_list.append(feature_data)
Expand Down Expand Up @@ -1349,7 +1355,7 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=
# # a gene or a transcript
#
return_object = {}
return_object['features'] = feature_data
return_object['features'] = [feature_data]
return return_object

def load_gff3(self, organism, gff3, source=None, batch_size=1,
Expand Down Expand Up @@ -1498,6 +1504,8 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1,
# sys.stdout.flush()

sys.stdout.flush()
print("features to write" + new_features_list)
print("transcripts to write" + new_transcripts_list)
self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE)
self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT)
sys.stdout.write("\nfinished loading\n")
Expand Down
16 changes: 10 additions & 6 deletions test-data/gene-top.gff
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
##gff-version 3
##sequence-region Merlin 1 172788
Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin
Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00
Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin
Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_exon;seqid=Merlin
##gff-version 3
##sequence-region Merlin 1 172788
ctg123 example gene 1050 9000 . + . ID=EDEN;Name=EDEN;Note=protein kinase
ctg123 example mRNA 1050 9000 . + . ID=EDEN.1;Parent=EDEN;Name=EDEN.1;Index=1
ctg123 example five_prime_UTR 1050 1200 . + . Parent=EDEN.1
ctg123 example CDS 1201 1500 . + 0 Parent=EDEN.1
ctg123 example CDS 3000 3902 . + 0 Parent=EDEN.1
ctg123 example CDS 5000 5500 . + 0 Parent=EDEN.1
ctg123 example CDS 7000 7608 . + 0 Parent=EDEN.1
ctg123 example three_prime_UTR 7609 9000 . + . Parent=EDEN.1
59 changes: 57 additions & 2 deletions test/annotations_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
from BCBio import GFF
from BCBio.GFF import GFFExaminer
# from gffutils import inspect


from . import ApolloTestCase, wa
from apollo import util

def parse(path):
in_handle = open(path)
for rec in GFF.parse(in_handle):
yield rec


class AnnotationsTest(ApolloTestCase):

def test_features_to_apollo_schema(self):
def test_inclusion(self):
assert ("gene" in util.gene_types)

def test_features_to_apollo_schema_mrna(self):
path = 'test-data/mrna-top.gff'
with open(path) as file:
print(file.read())
Expand All @@ -18,7 +29,51 @@ def test_features_to_apollo_schema(self):
feature_data = util._yieldApolloData(rec.features)

in_handle.close()
# print(str(len(feature_data)))
print(str(feature_data))
assert (feature_data['location'] is not None)
assert (len(feature_data['children']) == 2)

def test_features_to_apollo_schema_gene(self):
path = 'test-data/gene-top.gff'
print("inspecting")
output = parse(path)
print(str(output))
for o in output:
print("AAA")
print(str(o))
print("BBB")
print("inspected")

with open(path) as file:
print(file.read())
file.close()
in_handle = open(path)
feature_data = None
examiner = GFFExaminer()
print(examiner.parent_child_map(in_handle))
in_handle.close()
in_handle = open(path)
new_feature_list = []
new_transcript_list = []
for rec in GFF.parse(in_handle):
print(str(rec))
for f in rec.features:
print("feature ===== start")
print(f)
print("feature ===== end")
feature_data = wa.annotations._process_gff_entry(rec, new_feature_list=new_feature_list,
new_transcript_list=new_transcript_list)
print("feature list " + str(new_feature_list))
print("transcript list " + str(new_transcript_list))
print("feature data" + str(feature_data))
# assert (subfeatures is not None and len(subfeatures) > 0)
# # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list)
# feature_data = util._yieldApolloData(rec.features)

in_handle.close()
print(str(feature_data))
print("final feature list " + str(new_feature_list))
print("final transcript list " + str(new_transcript_list))
assert (feature_data['location'] is not None)
assert (len(feature_data['children']) == 2)

Expand Down
160 changes: 96 additions & 64 deletions test/io_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re

from . import ApolloTestCase, wa
import time


class IoTest(ApolloTestCase):
Expand All @@ -24,67 +25,98 @@ def test_export_gff3(self):
assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content
assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content

def test_export_vcf(self):

org = wa.organisms.show_organism('test_organism')

uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF')
if 'error' in uuid_vcf or 'uuid' not in uuid_vcf:
raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf)

vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text")
assert '##fileformat=VCFv4.2' in vcf_content
assert '##fileDate=20200608' in vcf_content
assert '##source=.' in vcf_content
assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content

def test_export_fa_cds(self):

org = wa.organisms.show_organism('test_organism')

uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds')
if 'error' in uuid_fa or 'uuid' not in uuid_fa:
raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa)

fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content
assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content
assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content
assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content
assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content
assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content
assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content

def test_export_fa_cdna(self):

org = wa.organisms.show_organism('test_organism')

uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna')
if 'error' in uuid_fa or 'uuid' not in uuid_fa:
raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa)

fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content
assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content
assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content
assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content
assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content
assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content
assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content

def test_export_fa_peptide(self):

org = wa.organisms.show_organism('test_organism')

uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide')
if 'error' in uuid_fa or 'uuid' not in uuid_fa:
raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa)

fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content
assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content
assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content
assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content
assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content
assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content
assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content
# def test_export_vcf(self):
#
# org = wa.organisms.show_organism('test_organism')
#
# uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF')
# if 'error' in uuid_vcf or 'uuid' not in uuid_vcf:
# raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf)
#
# vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text")
# assert '##fileformat=VCFv4.2' in vcf_content
# assert '##fileDate=20200608' in vcf_content
# assert '##source=.' in vcf_content
# assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content
#
# def test_export_fa_cds(self):
#
# org = wa.organisms.show_organism('test_organism')
#
# uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds')
# if 'error' in uuid_fa or 'uuid' not in uuid_fa:
# raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa)
#
# fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
# assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content
# assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content
# assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content
# assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content
# assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content
# assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content
# assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content
#
# def test_export_fa_cdna(self):
#
# org = wa.organisms.show_organism('test_organism')
#
# uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna')
# if 'error' in uuid_fa or 'uuid' not in uuid_fa:
# raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa)
#
# fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
# assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content
# assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content
# assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content
# assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content
# assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content
# assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content
# assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content
#
# def test_export_fa_peptide(self):
#
# org = wa.organisms.show_organism('test_organism')
#
# uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide')
# if 'error' in uuid_fa or 'uuid' not in uuid_fa:
# raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa)
#
# fa_content = wa.io.download(uuid_fa['uuid'], output_format="text")
# assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content
# assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content
# assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content
# assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content
# assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content
# assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content
# assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content

def setUp(self):
# Make sure the organism is not already there
temp_org_info = wa.organisms.show_organism('temp_org')
if 'directory' in temp_org_info:
wa.organisms.delete_organism(temp_org_info['id'])
self.waitOrgDeleted('temp_org')

org_info = wa.organisms.show_organism('alt_org')
if 'directory' not in org_info:
# Should not happen, but let's be tolerant...
# Error received when it fails: {'error': 'No row with the given identifier exists: [org.bbop.apollo.Organism#1154]'}
time.sleep(1)
org_info = wa.organisms.show_organism('alt_org')

wa.organisms.add_organism('temp_org', org_info['directory'])
self.waitOrgCreated('temp_org')

def tearDown(self):
org_info = wa.organisms.show_organism('temp_org')

if org_info and 'id' in org_info:
wa.organisms.delete_organism(org_info['id'])

self.waitOrgDeleted('temp_org')

org_info = wa.organisms.show_organism('some_new_org')

if org_info and 'id' in org_info:
wa.organisms.delete_organism(org_info['id'])
self.waitOrgDeleted('some_new_org')

0 comments on commit cf971b4

Please sign in to comment.