Skip to content

Commit

Permalink
Mergeing changes
Browse files Browse the repository at this point in the history
  • Loading branch information
David Jones committed Aug 8, 2014
2 parents cc6d1cd + 2f9c0ad commit 9c4bd9a
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 50 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/caveman.setup
/blib
/pm_to_blib
/Makefile
Expand Down
5 changes: 4 additions & 1 deletion MANIFEST
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
bin/caveman.pl
bin/caveman_merge_results.pl
CaVEMan-1.2.5.tar.gz
CaVEMan-1.2.6.tar.gz
caveman.setup
docs.tar.gz
docs/pod_html/_blkbluw.css
docs/pod_html/_blkcynw.css
Expand Down Expand Up @@ -38,5 +40,6 @@ setup.log
setup.sh
t/1_pm_compile.t
t/2_pl_compile.t
t/pcapCaveman.t
t/cavemanImplement.t
testData/1_2.fq
testData/1_2_bad.fq
76 changes: 53 additions & 23 deletions bin/caveman.pl
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ BEGIN
const my $RAW_MUTS => q{%s.muts.vcf};
const my $IDS_MUTS => q{%s.muts.ids.vcf};
const my $FLAGGED_MUTS => q{%s.flagged.muts.vcf};
const my $FLAGGED_MUTS_GZ => q{%s.flagged.muts.vcf.gz};
const my $FLAGGED_MUTS_TBI => q{%s.flagged.muts.vcf.gz.tbi};
const my $RAW_SNPS => q{%s.snps.vcf};
const my $IDS_SNPS => q{%s.muts.ids.vcf};
const my $IDS_SNPS => q{%s.snps.ids.vcf};
const my $IDS_SNPS_GZ => q{%s.snps.ids.vcf.gz};
const my $IDS_SNPS_TBI => q{%s.snps.ids.vcf.gz.tbi};
const my $NO_ANALYSIS => q{%s.no_analysis.bed};

const my @VALID_PROTOCOLS => qw(WGS WXS RNA);
Expand Down Expand Up @@ -121,20 +125,22 @@ BEGIN
Sanger::CGP::Caveman::Implement::caveman_merge_results($options);
}

# these values are used in multiple blocks
$options->{'raw_muts_file'} = sprintf($RAW_MUTS,$options->{'out_file'});
$options->{'ids_muts_file'} = sprintf($IDS_MUTS,$options->{'out_file'});
$options->{'raw_snps_file'} = sprintf($RAW_SNPS,$options->{'out_file'});
$options->{'ids_snps_file'} = sprintf($IDS_SNPS,$options->{'out_file'});

#Add ids to the VCF files
if(!exists $options->{'process'} || $options->{'process'} eq 'add_ids'){
$options->{'raw_muts_file'} = sprintf($RAW_MUTS,$options->{'out_file'});
$options->{'ids_muts_file'} = sprintf($IDS_MUTS,$options->{'out_file'});
$options->{'raw_snps_file'} = sprintf($RAW_SNPS,$options->{'out_file'});
$options->{'ids_snps_file'} = sprintf($IDS_MUTS,$options->{'out_file'});
#Muts
$options->{'raw_file'} = $options->{'raw_muts_file'};
$options->{'ids_file'} = $options->{'ids_muts_file'};
Sanger::CGP::Caveman::Implement::add_vcf_ids($options);
Sanger::CGP::Caveman::Implement::caveman_add_vcf_ids($options, 'muts');
#Snps
$options->{'raw_file'} = $options->{'raw_snps_file'};
$options->{'ids_file'} = $options->{'ids_snps_file'};
Sanger::CGP::Caveman::Implement::add_vcf_ids($options);
Sanger::CGP::Caveman::Implement::caveman_add_vcf_ids($options, 'snps');
}

#Flag the results.
Expand All @@ -161,16 +167,22 @@ sub cleanup{
|| die "Error trying to move cov_array '$options->{cave_carr}' -> '".File::Spec->catfile($options->{'outdir'},$CAVEMAN_COV_ARR)."': $!";
move ($options->{'splitList'},File::Spec->catfile($options->{'outdir'},'splitList'))
|| die "Error trying to move splitList '$options->{splitList}' -> '".File::Spec->catfile($options->{'outdir'},'splitList')."': $!";
move (sprintf($IDS_MUTS,$options->{'out_file'}),sprintf($IDS_MUTS,$final_loc))
|| die "Error trying to move raw muts file '".sprintf($IDS_MUTS,$options->{'out_file'})."' -> '".sprintf($IDS_MUTS,$final_loc)."': $!";
move (sprintf($IDS_SNPS,$options->{'out_file'}),sprintf($IDS_SNPS,$final_loc))
|| die "Error trying to move raw SNPs file '".sprintf($IDS_SNPS,$options->{'out_file'})."' -> '".sprintf($IDS_SNPS,$final_loc)."': $!";
move (sprintf($NO_ANALYSIS,$options->{'out_file'}),sprintf($NO_ANALYSIS,$final_loc))
|| die "Error trying to move no analysis file '".sprintf($NO_ANALYSIS,$options->{'out_file'})."' -> '".sprintf($NO_ANALYSIS,$final_loc)."': $!";
move (sprintf($FLAGGED_MUTS,$options->{'out_file'}),sprintf($FLAGGED_MUTS,$final_loc))
|| die "Error trying to move flagged muts file '".sprintf($FLAGGED_MUTS,$options->{'out_file'})."' -> '".sprintf($FLAGGED_MUTS,$final_loc)."': $!";

move (sprintf($IDS_SNPS_GZ,$options->{'out_file'}),sprintf($IDS_SNPS_GZ,$final_loc))
|| die "Error trying to move raw SNPs file '".sprintf($IDS_SNPS_GZ,$options->{'out_file'})."' -> '".sprintf($IDS_SNPS_GZ,$final_loc)."': $!";
move (sprintf($IDS_SNPS_TBI,$options->{'out_file'}),sprintf($IDS_SNPS_TBI,$final_loc))
|| die "Error trying to move raw SNPs file '".sprintf($IDS_SNPS_TBI,$options->{'out_file'})."' -> '".sprintf($IDS_SNPS_TBI,$final_loc)."': $!";

move (sprintf($FLAGGED_MUTS_GZ,$options->{'out_file'}),sprintf($FLAGGED_MUTS_GZ,$final_loc))
|| die "Error trying to move flagged muts file '".sprintf($FLAGGED_MUTS_GZ,$options->{'out_file'})."' -> '".sprintf($FLAGGED_MUTS_GZ,$final_loc)."': $!";
move (sprintf($FLAGGED_MUTS_TBI,$options->{'out_file'}),sprintf($FLAGGED_MUTS_TBI,$final_loc))
|| die "Error trying to move flagged muts file '".sprintf($FLAGGED_MUTS_TBI,$options->{'out_file'})."' -> '".sprintf($FLAGGED_MUTS_TBI,$final_loc)."': $!";

move ($options->{'logs'},File::Spec->catdir($options->{'outdir'},'logs'))
|| die "Error trying to move logs directory '$options->{logs}' -> '".File::Spec->catdir($options->{'outdir'},'logs')."': $!";

remove_tree ($options->{'tmp'});
return 0;
}
Expand Down Expand Up @@ -201,6 +213,9 @@ sub setup {
'u|unmatched-vcf=s' => \$opts{'unmatchedvcf'},
'np|normal-protocol=s' => \$opts{'normprot'},
'tp|tumour-protocol=s' => \$opts{'tumprot'},
'c|flagConfig=s' => \$opts{'flagConfig'},
'f|flagToVcfConfig=s' => \$opts{'flagToVcfConfig'},
'st|seqType=s' => \$opts{'seqType'},
) or pod2usage(2);

pod2usage(-message => PCAP::license, -verbose => 2) if(defined $opts{'h'});
Expand All @@ -211,7 +226,9 @@ sub setup {
for(keys %opts) { $defined++ if(defined $opts{$_}); }
pod2usage(-msg => "\nERROR: Options must be defined.\n", -verbose => 2, -output => \*STDERR) unless($defined);

pod2usage(-msg => "\nERROR: Options must be defined.\n", -verbose => 2, -output => \*STDERR) unless(defined($opts{'species'}) && defined($opts{'species-assembly'}));
pod2usage(-msg => "\nERROR: 'species' must be defined.\n", -verbose => 2, -output => \*STDERR) unless(defined $opts{'species'});
pod2usage(-msg => "\nERROR: 'species-assembly' must be defined.\n", -verbose => 2, -output => \*STDERR) unless(defined $opts{'species-assembly'});
pod2usage(-msg => "\nERROR: 'seqType' must be defined.\n", -verbose => 2, -output => \*STDERR) unless(defined $opts{'seqType'});

#check the reference is the fasta fai file.
pod2usage(-msg => "\nERROR: reference option (-r) does not appear to be a fasta index file.\n", -verbose => 2, -output => \*STDERR) unless($opts{'reference'} =~ m/\.fai$/);
Expand All @@ -231,6 +248,9 @@ sub setup {
PCAP::Cli::file_for_reading('germline-indel-bed',$opts{'germindel'});
PCAP::Cli::out_dir_check('outdir', $opts{'outdir'});

PCAP::Cli::file_for_reading('flagConfig',$opts{'flagConfig'}) if(defined $opts{'flagConfig'});
PCAP::Cli::file_for_reading('flagToVcfConfig',$opts{'flagToVcfConfig'}) if(defined $opts{'flagToVcfConfig'});

delete $opts{'process'} unless(defined $opts{'process'});
delete $opts{'index'} unless(defined $opts{'index'});
delete $opts{'limit'} unless(defined $opts{'limit'});
Expand Down Expand Up @@ -299,10 +319,10 @@ sub setup {
make_path($progress) unless(-d $progress);
#Directory to store run logs.
my $logs;
if($opts{'lgs'}){
if(defined $opts{'lgs'}){
$logs = $opts{'lgs'};
}else{
$logs = File::Spec->catdir($opts{'outdir'}, 'logs');
$logs = File::Spec->catdir($opts{'tmp'}, 'logs');
}
make_path($logs) unless(-d $logs);
$opts{'logs'} = $logs;
Expand Down Expand Up @@ -348,8 +368,9 @@ =head1 SYNOPSIS
-species -s Species name for (output in VCF)
-species-assembly -sa Species assembly for (output in VCF)
-flag-bed-files -b Bed file location for flagging (eg dbSNP.bed NB must be sorted.)
-germline-indel -in Location of germline indel bedfile
-unmatched-vcf -u Directory containing unmatched normal VCF files
-germline-indel -in Location of germline indel bedfile
-unmatched-vcf -u Directory containing unmatched normal VCF files
-seqType -st Sequencing type (genomic|pulldown)
Optional parameters:
-normal-contamination -k Normal contamination value (default 0.1)
Expand All @@ -358,14 +379,23 @@ =head1 SYNOPSIS
-logs -g Location to write logs (default is ./logs)
-normal-protocol -np Normal protocol [WGS|WXS|RNA] (default WGS)
-tumour-protocol -tp Tumour protocol [WGS|WXS|RNA] (default WGS)
-normal-contamination -k Normal contamination value (default 0.1)
-threads -t Number of threads allowed on this machine (default 1)
-limit -l Limit the number of jobs required for m/estep (default undef)
-logs -g Location to write logs (default is ./logs)
Optional flagging parameters: [default to those found in cgpCaVEManPostProcessing]
-flagConfig -c Config ini file to use for flag list and settings
-flagToVcfConfig -f Config::Inifiles style config file containing VCF flag code to flag
name conversions
Targeted processing (further detail under OPTIONS):
-process -p Only process this step then exit, optionally set -index
-index -i Optionally restrict '-p' to single job
-process -p Only process this step then exit, optionally set -index
-index -i Optionally restrict '-p' to single job
Other:
-help -h Brief help message.
-man -m Full documentation.
Other:
-help -h Brief help message.
-man -m Full documentation.
=head1 OPTIONS
Expand Down
Binary file modified docs.tar.gz
Binary file not shown.
64 changes: 42 additions & 22 deletions lib/Sanger/CGP/Caveman/Implement.pm
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ sub caveman_setup {

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);

return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_setup', 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
}

sub caveman_split {
Expand All @@ -109,13 +109,13 @@ sub caveman_split {
return 1 if(exists $options->{'index'} && $index != $options->{'index'});
my $tmp = $options->{'tmp'};
my $config = $options->{'cave_cfg'};
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_split', $index);
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);

my $command = _which('caveman') || die "Unable to find 'caveman' in path";
$command .= sprintf($CAVEMAN_SPLIT,$index,$config);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_split', $index);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
}

sub caveman_merge{
Expand All @@ -127,13 +127,13 @@ sub caveman_merge{
my $config = $options->{'cave_cfg'};
my $prob_arr = $options->{'cave_parr'};
my $cov_arr = $options->{'cave_carr'};
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_merge', 0);
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);

$command .= sprintf($CAVEMAN_MERGE, $cov_arr, $prob_arr,$config);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);

return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_merge', 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
}

sub caveman_mstep{
Expand All @@ -147,7 +147,7 @@ sub caveman_mstep{
my $config = $options->{'cave_cfg'};
my $tmp = $options->{'tmp'};
for my $index(@indicies) {
next if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_mstep', $index);
next if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);

my $command = _which('caveman') || die "Unable to find 'caveman' in path";

Expand All @@ -156,7 +156,7 @@ sub caveman_mstep{
$config);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_mstep', $index);
PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
}
return 1;
}
Expand All @@ -177,7 +177,7 @@ sub caveman_estep{
my $tumprot = $options->{'tumprot'};

for my $index(@indicies) {
next if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_estep', $index);
next if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $index);

my $command = _which('caveman') || die "Unable to find 'caveman' in path";

Expand All @@ -195,7 +195,7 @@ sub caveman_estep{
$tumprot);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $index);
PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_estep', $index);
PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $index);
}
return 1;
}
Expand Down Expand Up @@ -225,20 +225,20 @@ sub caveman_merge_results {

sub caveman_add_vcf_ids{
# uncoverable subroutine
my $options = shift;
my ($options, $snps_or_muts) = @_;
my $tmp = $options->{'tmp'};
my $raw = $options->{'raw_file'};
my $ids = $options->{'ids_file'};

return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_add_vcf_ids', 0);
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), $snps_or_muts);

my $command = _which($IDS_SCRIPT) || die "Unable to find '$IDS_SCRIPT' in path";
my $command = $^X.' '._which($IDS_SCRIPT) || die "Unable to find '$IDS_SCRIPT' in path";
$command .= sprintf($CAVEMAN_VCF_IDS,
$raw,
$ids);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_add_vcf_ids', 0);
PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, $snps_or_muts);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), $snps_or_muts);
}

sub caveman_flag{
Expand All @@ -251,10 +251,10 @@ sub caveman_flag{
my $normbam = $options->{'normbam'};
my $ref = $options->{'reference'};

return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_flag', 0);
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);

my $command = _which($FLAG_SCRIPT) || die "Unable to find '$FLAG_SCRIPT' in path";
$command .= sprintf($CAVEMAN_FLAG,
my $flag = $^X.' '._which($FLAG_SCRIPT) || die "Unable to find '$FLAG_SCRIPT' in path";
$flag .= sprintf($CAVEMAN_FLAG,
$for_flagging,
$flagged,
$options->{'species'},
Expand All @@ -263,10 +263,30 @@ sub caveman_flag{
$options->{'flag-bed'},
$options->{'germindel'},
$options->{'unmatchedvcf'},
$ref);
$ref,
$options->{'seqType'},
);
$flag .= ' -c '.$options->{'flagConfig'} if(defined $options->{'flagConfig'});
$flag .= ' -v '.$options->{'flagToVcfConfig'} if(defined $options->{'flagToVcfConfig'});

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_flag', 0);
my $vcf_gz = $flagged.'.gz';
my $bgzip = _which('bgzip');
$bgzip .= sprintf ' -c %s > %s', $flagged, $vcf_gz;

my $tabix = _which('tabix');
$tabix .= sprintf ' -p vcf %s', $vcf_gz;

my $vcf_snps_gz = $options->{'ids_snps_file'}.'.gz';
my $bgzip_snps = _which('bgzip');
$bgzip_snps .= sprintf ' -c %s > %s', $options->{'ids_snps_file'}, $vcf_snps_gz;

my $tabix_snps = _which('tabix');
$tabix_snps .= sprintf ' -p vcf %s', $vcf_snps_gz;

my @commands = ($flag, $bgzip, $tabix, $bgzip_snps, $tabix_snps);

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), \@commands, 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
}

sub limited_flag_indicies {
Expand Down Expand Up @@ -303,11 +323,11 @@ sub concat {
my $tmp = $options->{'tmp'};
my $out = $options->{'out_file'};
my $target = $options->{'target_files'};
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'caveman_concat_split', 0);
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
my $command = sprintf('cat %s > %s',$target,$out);
PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);

return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'caveman_concat_split', 0);
return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);

}

Expand Down
8 changes: 4 additions & 4 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##########LICENCE##########

CAVEMAN_CORE="https://github.com/cancerit/CaVEMan/archive/1.2.5.tar.gz"
CAVEMAN_CORE="https://github.com/cancerit/CaVEMan/archive/1.2.6.tar.gz"
SOURCE_SAMTOOLS="https://github.com/samtools/samtools/archive/0.1.19.tar.gz"


Expand Down Expand Up @@ -128,13 +128,13 @@ if [ -e $SETUP_DIR/caveman.success ]; then
else
cd $SETUP_DIR
(
set -x
set -xe
if [ ! -e caveman ]; then
if [ ! -e $INIT_DIR/CaVEMan-1.2.5.tar.gz ]; then
if [ ! -e $INIT_DIR/CaVEMan-1.2.6.tar.gz ]; then
get_distro "caveman" $CAVEMAN_CORE
else
mkdir -p caveman
tar --strip-components 1 -C caveman -zxf $INIT_DIR/CaVEMan-1.2.5.tar.gz
tar --strip-components 1 -C caveman -zxf $INIT_DIR/CaVEMan-1.2.6.tar.gz
fi
fi
make -C caveman clean
Expand Down

0 comments on commit 9c4bd9a

Please sign in to comment.