Skip to content

Commit

Permalink
avoid self hit with buffer length
Browse files Browse the repository at this point in the history
  • Loading branch information
chienchi authored May 21, 2020
1 parent 7913013 commit 877950e
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/get_repeat_coords.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

my $identity=95;
my $len_cutoff=0;
my $buffer=5;
my $output="repeats_coords.txt";
my $stats= "repeats_stats.txt";

Expand All @@ -16,6 +17,7 @@
'i=i' => \$identity,
'l=i' => \$len_cutoff,
'o=s' => \$output,
'b=i' => \$buffer,
's=s' => \$stats,
'help|?' => sub{Usage()},
);
Expand All @@ -26,6 +28,7 @@ sub Usage
perl $0 [options] <fasta>
-i INT the identity cutoff 0 to 100 (default: 95)
-l INT the repeat length cutoff (default:0)
-b INT the buffer base length to skip self-hits (default:5)
-o STRING output filename (default: repeats_coords.txt)
-s STRING output stats filename (default: repeats_stats.txt)
Expand Down Expand Up @@ -77,6 +80,12 @@ sub get_coords_file
my $seq_id=$fields[7];
my $start=$fields[0];
my $end=$fields[1];
## Skip example
#[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [TAGS]
#19871 29943 19871 29944 10073 10074 99.99 EPI_ISL_417419 EPI_ISL_417419
if ($start < ($fields[2]+$buffer) && $start > ($fields[2]-$buffer) && $end < ($fields[3]+$buffer) && $end > ($fields[3]-$buffer)){
next;
}
for my $pos ($start..$end){
$hash{$seq_id}->{$pos}=1;
}
Expand Down

0 comments on commit 877950e

Please sign in to comment.