-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhla_exons_sep_fastq.pl
executable file
·52 lines (47 loc) · 1.53 KB
/
hla_exons_sep_fastq.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Bio::SeqIO;
use Bio::SeqIO::embl;
use List::Util qw(reduce);
my $filename = $ARGV[0];
my $stream = Bio::SeqIO->new(-file => $filename, -format => 'EMBL');
my $num_seq = 0;
my %hla = ();
my %utr = ();
my %partial_count;
my $psuedo_gene = 0;
while ( (my $seq = $stream->next_seq()) ) {
# do something with $seq
my @cds = $seq->get_SeqFeatures('CDS');
if (scalar @cds == 1) {
my $cds_tag = $cds[0];
my @gene = $cds_tag->get_tag_values('gene');
my @allele = $cds_tag->get_tag_values('allele');
if (scalar @gene == 1 && ($gene[0] =~ /HLA-[A-C]/ || $gene[0] =~ /HLA-D[QPR].*/ )) {
my $gene_name = $gene[0];
my $allele_name = $allele[0];
my @features = $seq->get_SeqFeatures();
my @allele_split = split /\*/, $allele_name;
my @allele_breakdown = split /:/, $allele_split[1];
foreach my $feature (@features) {
if ($feature->primary_tag eq 'exon') {
my @number = $feature->get_tag_values('number');
$hla{$allele_name}{$number[0]} = $feature->seq()->seq();
}
}
}
} else {
++$psuedo_gene;
}
++$num_seq;
}
print STDERR "There are $num_seq sequences in this file\n";
foreach my $key (sort keys %hla) {
foreach my $exon (sort keys $hla{$key}) {
print ">${key} exon$exon\n";
print $hla{$key}{$exon}."\n";
}
}
print STDERR "Psuedo genes $psuedo_gene\n";