This repository has been archived by the owner on Jul 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathitreecountpe.pl
executable file
·90 lines (66 loc) · 2.37 KB
/
itreecountpe.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env perl
use strict;
use warnings;
use Parallel::ForkManager;
use Getopt::Long;
use File::Temp qw/ tempfile tempdir /;
use Cwd 'abs_path';
use FindBin;
use lib "$FindBin::Bin";
use TreeCount;
my $gtffile;
my $stranded = 0;
my $ncpu = 10;
my $verbose;
my $output;
GetOptions("gtf:s"=>\$gtffile, "stranded"=>\$stranded, "ncpu|t:i"=>\$ncpu, "verbose|v"=>\$verbose, "output|o:s"=>\$output) or die "Usage";
my $bamfile = abs_path(shift);
(my $iforest, my $genelist) = read_gtf_as_intervalforest($gtffile);
my ($bam, $header) = open_bam($bamfile);
my $target_count = $header->n_targets;
my $target_names = $header->target_name;
my $chrlengths = $header->target_len;
#prepare the fork manager
my $pm = Parallel::ForkManager->new($ncpu);
#the location for the temp files
my $tmpdir = tempdir( CLEANUP => 1 );
my %tmpfiles;
#process the chromosomes in large to small order should give best runtime
my @o = sort { $chrlengths->[$b] <=> $chrlengths->[$a] } 0 .. ($target_count - 1);
foreach my $tid (@o) {
my $chr = $target_names->[$tid];
print STDERR "Forking child for chromosome $chr\n";
#open the temp file for this chromosome
my ($fh, $filename) = tempfile( UNLINK => 0, DIR => $tmpdir );
if($verbose) {
print STDERR "Child $$ writing temporary data to $filename\n";
}
$tmpfiles{$target_names->[$tid]} = $filename;
$pm->start and next; # do the fork
#we must reopen the bam in the forked child
my ($fbam, $fheader, $findex) = open_bam($bamfile);
#initialize all counts at zero
my $count = { map {$_ => 0} keys %{$genelist->{$chr}} };
my $delayed = {};
#load the requested chromsome from the bam file and call the callback for every read
$findex->fetch($fbam,$fheader->parse_region($chr), \&TreeCount::count_read_callback, [$iforest, $count, $delayed, $target_names, $stranded]);
print STDERR "Chromosome $chr done. ", scalar(keys(%$delayed)), " mates never matched\n" if $verbose;
print $fh join("\t", $_, $count->{$_}),"\n" foreach sort keys %$count;
close($fh);
$pm->finish; # do the exit in the child process
}
$pm->wait_all_children;
#write report
print STDERR "Collecting data parts\n";
if(defined $output) {
open(OUT,">", $output) or die "$!\n";
select OUT;
}
#collect all data
for my $ch (@$target_names) {
#get the formatted path
open(I,"<", $tmpfiles{$ch}) or warn "Temp file $tmpfiles{$ch} for chromosome $ch not found?\n";
print while <I>;
close(I);
}
exit;