-
Notifications
You must be signed in to change notification settings - Fork 7
/
split-dbSNP-by-chr.pl
73 lines (59 loc) · 2.25 KB
/
split-dbSNP-by-chr.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/perl -w
#############################################################
#
# Script that takes dbSNP file generated from dbSNP/*.pl
# and split by chromosomes
# This is because the dbSNP output file is large
# (+1GB for humans, could be larger for other organisms
#
# The output file will be used in map-dbSNP-to-records.pl
# Author: Sim Ngak Leng
# First created: 2011-07-25
# Last modified: 2011-07-26
#
#############################################################
use strict;
#require 'common-utils.pl';
use File::Basename;
use Cwd qw(abs_path);
my $directory_of_script = dirname(abs_path(__FILE__));
require $directory_of_script . '/common-utils.pl';
if (scalar @ARGV != 1) {
die "Usage: perl $0 <metafile>\n" .
"Example: perl $0 human.txt\n";
}
my $TRUE = 0;
my $FALSE = 1;
my $DEBUG = $TRUE;
my ($meta_file) = @ARGV;
my $meta_href = readMeta($meta_file);
#my %meta_hash = %{$meta_href};
### MAIN ###################################################
&split_dbSNP_output_by_chromosomes($meta_href);
### SUB ROUTINE
sub split_dbSNP_output_by_chromosomes() {
print "Splitting dbSNP output file into different chromosomes.\n";
my $meta_href = $_[0];
my %meta_hash = %{$meta_href};
#my $chrlist = $meta{"CHR_LIST"};
#my @chromosomes = split(",", $chrlist);
my $chr_fasta_dir = $meta_hash{"PARENT_DIR"} . "/". $meta_hash{"CHR_DOWNLOAD_DEST"};
my @chromosomes = getChr ($meta_hash{"PARENT_DIR"} . "/". $meta_hash{"GENE_DOWNLOAD_DEST"});
my $dbSNPFile = $meta_hash{"PARENT_DIR"} . "/". $meta_hash{"DBSNP_DIR"} . "/" . $meta_hash{"DBSNP_VCF_FILE"};
my $cat_command = "cat";
if ($dbSNPFile =~ /\.gz$/) {
$cat_command = "zcat";
}
print "DBSNP main output file is : $dbSNPFile\n";
my $outputDir = $meta_hash{"PARENT_DIR"} . "/". $meta_hash{"DBSNP_DIR"};
foreach my $chr (@chromosomes) {
# print "Awk-ing chromosome chr$chr\n";
#if ($chr eq "M") { $chr = "MT"; } # This is to cater to dbSNP calling Mitochrondria chromosome MT
my $outfile = $outputDir . "/vcf_chr_" . $chr . ".vcf";
my $chrField = '$1';
my $command = "$cat_command $dbSNPFile | awk '{ if ($chrField == \"$chr\") { print }}' | gzip > $outfile.gz";
print "$command\n";
system($command);
} #end foreach
} #end split_dbSNP_output_by_chromosomes
__END__