-
Notifications
You must be signed in to change notification settings - Fork 7
/
make-sift-scores-db-batch.pl
79 lines (60 loc) · 2.8 KB
/
make-sift-scores-db-batch.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/perl -w
########################################################
#
# Batch script to
# 1) map-scores-back-to-records.pl
# 2) match-replace-dbSNP-vcf.pl
# 3) add-gene-id-output-tsv.pl
#
# Author: Pauline Ng
# First created: 2012-12-12
# Last modified: 2011-12-12
#
########################################################
use strict;
#require 'common-utils.pl';
use File::Basename;
use Cwd qw(abs_path);
my $directory_of_script = dirname(abs_path(__FILE__));
require $directory_of_script . '/common-utils.pl';
if (scalar @ARGV != 1) {
die "Usage: perl $0 <metafile>\n" .
"Example: perl $0 mouse.txt\n";
}
my ($metafile) = @ARGV;
my $meta_href = readMeta($metafile);
my %meta_hash = %{$meta_href};
my @chromosomes = getChr ( $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"GENE_DOWNLOAD_DEST"});
my $final_outfolder = $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"ORG_VERSION"} ;
make_dir ($final_outfolder);
foreach my $chr (@chromosomes) {
#my $command = "perl make-single-records-BIOPERL.pl $metafile " . "chr" . $chr;
my $chr_of_interest = "chr" . $chr;
my $command1 = "perl map-scores-back-to-records.pl $metafile $chr";
print "Executing $command1\n";
`$command1`;
my $command2 = "perl match-replace-dbSNP-vcf.pl $metafile $chr";
print "Executing $command2\n";
`$command2`;
my $mapped_score_file = $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"SINGLE_REC_WITH_SIFTSCORE_DIR"} . "/" . $chr . "_scores.Srecords"; # this is output of command1
my $db_file = $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"SINGLE_REC_WITH_SIFTSCORE_DIR"} . "/" . $chr . "_scores.Srecords.with_dbSNPid";
my $noncod_file = $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"SINGLE_REC_BY_CHR_DIR"} . "/". $chr . ".singleRecords_noncoding.with_dbSNPid";
print "db file $db_file\nnoncoding $noncod_file";
# checking file
my $check_outfile = $final_outfolder . "/" . $chr . "_SIFTDB_stats.txt";
# now sort the files
print "final outfolder is $final_outfolder\n";
my $db_sorted_gz = $final_outfolder . "/" . $chr . ".gz";
my $db_sorted = $db_file . ".sorted";
my $line = "\"#Position\tRef_allele\tNew_allele\tTranscript_id\tGene_id\tGene_name\tRegion\tRef_amino_acid\tNew_amino_acid\tPosition_of_amino_acid_substitution\tSIFT_score\tSIFT_median_sequence_info\tNum_seqs_at_position\tdbSNP_id\"";
`echo $line > $db_sorted`;
`cat $noncod_file $db_file | LC_ALL=C sort -k2,2n -k4,4 -k12,12n | cut -f2-15 >> $db_sorted`;
my $regions_file = $final_outfolder . "/" . $chr . ".regions";
my $command4 = "python make_regions_file.py $db_sorted $regions_file";
print "$command4\n";
`$command4`;
`gzip -c $db_sorted > $db_sorted_gz`;
print "Built single records " . $meta_hash{"PARENT_DIR"} . "/" . $meta_hash{"SINGLE_REC_BY_CHR_DIR"} . "\n";
system ("rm $mapped_score_file");
system ("rm $db_file");
}