-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_length_GC.pl
91 lines (79 loc) · 1.48 KB
/
get_length_GC.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/perl
use strict;
use warnings;
#Scripts to get seqeunce length and GC %
# 2015-11-09
if(@ARGV<2)
{
print "Usage perl $0<input file> < 0: fastq; >0: column in file, index from 1>\n";
exit;
}
if($ARGV[1] ==0)
{
#print "processing fasta file , to be added\n";
my $seq="";
my $id="";
my $count=0;
my $len=0;
open IN,"$ARGV[0]" or die "can not open $ARGV[0]\n";
while(<IN>)
{
chomp;
if(/^>/)
{ #print $_,"\n";
if($id ne "")
{
my $gc=sprintf("%f",$count*100/$len);
$id=~s/>//;
print $id,"\t",$len,"\t",$gc,"\n";
$seq="";
my @a=split;
$id=$a[0];
$count=0;
$len=0;
}
else
{
my @a=split;
$id=$a[0];
}
}
else
{
$seq=$_;
#print $seq,"\n";
my $len_c=length($seq);
$len+=length($seq);
for (my $i = 0; $i < $len_c; $i++) {
my $sub = substr($seq,$i,1);
if ($sub =~ /G|C/i) { $count++; }
}
#exit;
}
}
my $gc=sprintf("%f",$count*100/$len);
$id=~s/>//;
print $id,"\t",$len,"\t",$gc,"\n";
}
else
{
my $column =$ARGV[1];
open IN,"$ARGV[0]" or die "can not open $ARGV[0]\n";
while(<IN>)
{
chomp;
my @a=split;
my $seq=$a[$column-1];
my $len=length($seq);
my $count=0;
for (my $i = 0; $i < $len; $i++) {
my $sub = substr($seq,$i,1);
if ($sub =~ /G|C/i) {
$count++;
}
}
my $gc=sprintf("%f",$count*100/$len);
print $_,"\t",$len,"\t",$gc,"\n";
}
}