-
Notifications
You must be signed in to change notification settings - Fork 38
/
get_pseudo.pl
40 lines (34 loc) · 1.1 KB
/
get_pseudo.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env perl
use strict;
# use Data::Dumper;
@ARGV or die "Usage: $0 <prokka.faa>";
my %ignore = ('hypothetical protein'=>1);
my @gene;
while (<ARGV>) {
next unless m/^>(\S+)\s+(.+)$/;
push @gene, [ $1, $2 ];
}
my $N = scalar(@gene);
print STDERR "Found $N genes.\n";
# print Dumper(\@gene);
my $P = 0;
if ($N > 1) {
for my $i (1 .. $N) {
my $prod = $gene[$i-1][1];
if ( !$ignore{$prod} and $gene[$i][1] eq $prod ) {
print "$gene[$i-1][0] & $gene[$i][0] => $prod\n";
$P++;
}
}
}
print STDERR "Found $P potential psuedo-genes\n";
#>MCDIFLJF_00002 Branched-chain-amino-acid aminotransferase
#MAESIKKLSYFEGKILPESEAKISIQTHALQYGTTVFGGLRGYYDKDTDNIYLFRILDHY
#QRLINSTRIMQLKLDKTKEELRDITIDLIRQCGYKENIYLRPFVYTSALQLSPRFHDVPT
#ELAIYILQLNDYLDTKHGLKTMVSSWRRFDDAVIPTLSKVSGGYVNSALAKSEAVQNGFD
#EAIFLDSRGFVSEGSAENIFLVRDGKIITPGINSSLLEGITRRSVLQIAKDNGIEVIERD
#ISRSELYISDEVFFSGTGVQIAWVSEIDHRKIGNSKMGPITKKIQSLFFNLVINKEEKYR
#HWLTPVY
#>MCDIFLJF_00003 DNA polymerase III subunit tau
#MFRSAFQFDDILGQEVALTFLKRYTSKPETIPPLLIFHGPDGTGKESTSERFIKNVLCFE
#GTSCGTCASCKAFMRNSHPDYICFPEDRGKIIAIGSEDNPEEFTIRWLIRSRLNYRPHLS