-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorrect_ref_with_reseq.pl
executable file
·53 lines (50 loc) · 1.33 KB
/
correct_ref_with_reseq.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
## run 'cat file.vcf | vcf-to-tab > out.tab' before running this script
my $vcf_tab = shift;
my $ref_id = shift;
if (!defined $ref_id){
$ref_id = 'NB';
}
open VCFTAB, "$vcf_tab" or die;
## ref is NB
##CHROM POS REF EG4_2 HEG4_2 NB
#Chr1 1117 A A/A A/C A/A
chomp (my $header = <VCFTAB>);
my @header = split /\t/ , $header;
shift @header;
shift @header;
shift @header;
my @strains = @header;
my $strain_count = @strains;
$header =~ s/\t$ref_id//;
print $header , "\n";
while (my $line = <VCFTAB>){
chomp $line;
my ($chr , $pos , $ref_nt , @snp) = split /\t/ , $line;
#print "$chr , $pos , $ref_nt , @snp\n";
my $reseq_index;
for (my $i=0 ; $i < $strain_count ; $i++){
if ( $header[$i] eq "$ref_id"){
$reseq_index = $i ;
}
}
my ($a1 , $a2) = $snp[$reseq_index] =~ /(.)\/(.)/;
#change ref if diff from reseq ref
if ( ($a1 eq $a2) and ($a1 ne $ref_nt) and $a1 ne '.'){
$ref_nt = $a1;
}
#what should i do if reseq ref-reseq is het at a position
#throw out any lines that have reseq ref as heter
elsif ($a1 ne $a2){
#move to next position, dont print this one
next;
}
print "$chr\t$pos\t$ref_nt";
for (my $i=0 ; $i < $strain_count ; $i++){
next if $i == $reseq_index;
print "\t$snp[$i]";
}
print "\n";
}