-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloc_type_summary.pl
42 lines (41 loc) · 1.2 KB
/
loc_type_summary.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/perl
use warnings;
use strict;
#
#this perl script is used for summary reads biotype from align-bam files
#need file: GTF with biotype messages,reads_align bam file;
#
die "perl $0 <GTF_gene_info> <accept_bam> <RUN_log> > result.txt" if @ARGV ne 3;
my %gene_loc_hash;
open GTF,$ARGV[0] or die "failed to open GTF file";
while(<GTF>){
chomp;
my @array = split(/\t/,$_);
my ($biotype) = $_ =~ /gene_biotype "(.*?)"/;
my $loc = $array[3]."_".$array[4];
#defined loc type
$gene_loc_hash{"chr_".$array[0]}{$loc} = $biotype;
}
#defined type hash
my %type_hash;
open BAM,"samtools view $ARGV[1] |" or die "failed to open BAM file";
open LOG,">./$ARGV[2]" or die "failed to open log file";
while(<BAM>){
chomp;
my @array2 = split(/\t/,$_);
my $mark = "unknown";
foreach my $keys(keys $gene_loc_hash{"chr_".$array2[2]}){
#get start & end loc;
my ($loc_start,$loc_end) = $keys =~ /(.*?)_(.*)/;
if($array2[3] >= $loc_start && $array2[3] <= $loc_end){
$type_hash{$gene_loc_hash{"chr_".$array2[2]}{$keys}}++;
$mark = $gene_loc_hash{"chr_".$array2[2]}{$keys};
last;
}
}
print LOG $array2[0]."\t$mark\n";
}
#result output
foreach my $keys(keys %type_hash){
print $keys."\t".$type_hash{$keys}."\n";
}