Skip to content

Commit

Permalink
Clean up option parsing; now allow filtering on genotype
Browse files Browse the repository at this point in the history
  • Loading branch information
nriddiford committed Jun 19, 2018
1 parent 238003e commit d6fbbf6
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 87 deletions.
49 changes: 18 additions & 31 deletions bin/svParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,17 @@ sub lumpy {
}
}

if ( $filter_flags{'s'} ){
$filters = somatic_filter( $id, $tumour, $control, $normals, $filters, \%PON_info );
if ( $filter_flags{'st'} ){
$filters = genotype_filter( $id, $genotype, $filters, 'somatic_tumour' );
}
elsif ( $filter_flags{'sn'} ){
$filters = genotype_filter( $id, $genotype, $filters, 'somatic_normal' );
}
elsif ( $filter_flags{'gp'} ){
$filters = genotype_filter( $id, $genotype, $filters, 'germline_private' );
}
elsif ( $filter_flags{'gr'} ){
$filters = genotype_filter( $id, $genotype, $filters, 'germline_recurrent' );
}

my @samples = @{ $samples };
Expand Down Expand Up @@ -356,7 +365,7 @@ sub lumpy {

my @filter_reasons = @{ $filters };

### This might be useless - isn't this happening in somatic_filter() ?
### This might be useless - isn't this happening in genotype_filter() ?
if (@samples > 1){ # In case there are no control samples...
# for precise variants:
if ($info_block !~ /IMPRECISE;/) {
Expand Down Expand Up @@ -621,8 +630,8 @@ sub delly {
$PON_info{$normal} = [ ($sample_info{$id}{$normal}{'DV'} + $sample_info{$id}{$normal}{'RV'}) , $sample_info{$id}{$normal}{'GT'} ];
}

if ( $filter_flags{'s'} ){
$filters = somatic_filter( $id, $tumour_name, $control_name, $normals, $filters, \%PON_info );
if ( $filter_flags{'st'} ){
$filters = genotype_filter( $id, $genotype, $filters, 'somatic_tumour' );
}

if ( $filter_flags{'su'} ){
Expand All @@ -642,20 +651,14 @@ sub delly {

sub summarise_variants {
my ( $SVs, $filter_switch, $region ) = @_;

my ($dels, $dups, $trans, $invs, $filtered) = (0,0,0,0,0);
my ($tds, $CNVs, $ins) = (0,0,0);

my ( $chromosome, $query_start, $query_stop, $specified_region) = parseChrom($region) if $region;

my %support_by_chrom;

my $read_support;

my %filtered_sv;

my %sv_type_count;

my @gens = qw/germline_recurrent germline_private somatic_tumour somatic_normal/;
my @types = qw/DEL DUP INV BND TRA DUP:TANDEM CNV INS /;

Expand All @@ -666,13 +669,11 @@ sub summarise_variants {
}

for (keys %{ $SVs } ){

my ( $chr, $start, $id, $ref, $alt, $quality_score, $filt, $info_block, $format_block, $tumour_info_block, $normal_info_block, $sv_type, $SV_length, $stop, $chr2, $SR, $PE, $ab, $filters, $genotype, $samples ) = @{ $SVs->{$_} };

if ( $chromosome ){
next if $chr ne $chromosome;
}

if ( $specified_region ){
if ( ( $start < $query_start or $start > $query_stop ) and ( $stop < $query_stop or $stop > $query_stop ) ){
next;
Expand All @@ -693,12 +694,9 @@ sub summarise_variants {
next if $filter_switch;
}


$read_support = ( $SR + $PE );
$support_by_chrom{$id} = [ $read_support, $sv_type, $chr, $SV_length, $start ];

$sv_type_count{$genotype}{$sv_type}++;

}
print "\n";

Expand Down Expand Up @@ -1162,23 +1160,12 @@ sub read_depth_filter {
}


sub somatic_filter {
my ($id, $tumour_name, $control_name, $PON, $filter_reasons, $PON_info) = @_;

my %PON_info = % { $PON_info };
sub genotype_filter {
my ($id, $genotype, $filter_reasons, $selected_genotype ) = @_;
my @filter_reasons = @{ $filter_reasons };

for my $normal (keys %PON_info) {

if ( $PON_info{$normal}[1] eq '1/1' or $PON_info{$normal}[1] eq '0/1' ){
push @filter_reasons, "PON member '$normal' not homozygous for reference genotype=" . $PON_info{$normal}[1];
}
# Filter if any quality reads support var in any normal in PON
if ( $PON_info{$normal}[0] != 0){
push @filter_reasons, "PON member '$normal' has quality support for alternative genotype=" . $PON_info{$normal}[0];
if ( $genotype ne $selected_genotype ){
push @filter_reasons, "Not $selected_genotype: genotype=" . $genotype;
}

}
return(\@filter_reasons);
}

Expand Down
131 changes: 75 additions & 56 deletions script/svParse.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

use feature qw/ say /;
use Data::Dumper;
use Data::Printer;
use File::Basename;
use File::Path qw/ make_path / ;
use File::Slurp;
Expand All @@ -25,7 +26,7 @@
my $chromosome;
my $method = "guess";
my $print;
my $exclude;
my $exclude = "";
my %filters;
my $PON_print = 5;
my %opt;
Expand Down Expand Up @@ -91,42 +92,28 @@
}
}

my $filter_switch= 0;

if ( scalar keys %filters > 0 ){
print "\n";
if ( exists $filters{'a'} ){
$filter_switch = 1;
my $filter_ref = allFilters($exclude, \@keys, \%filters);
}
elsif ( $filters{'su'} or $filters{'dp'} or $filters{'rdr'} or $filters{'sq'} or $filters{'chr'} or $filters{'g'} or $filters{'e'} or $filters{'n'} or $filters{'s'} ) {
say "Running in filter mode, using custom filters:";
say " o Read support >= $filters{'su'}" if $filters{'su'};
say " o Read depth (in both tumour and normal) > $filters{'dp'}" if $filters{'dp'};
say " o Read support / depth > $filters{'rdr'}" if $filters{'rdr'};
say " o SQ quality > $filters{'sq'}" if $filters{'sq'};
say " o Chromosomes: " . join(' ', @keys) if $filters{'chr'};
say " o Running in germline mode" if $filters{'g'};
say " o Running in somatic TUMOUR mode" if $filters{'s'};
say " o Running in somatic NORMAL mode" if $filters{'n'};
say " o Excluding calls overlapping: $exclude" if $filters{'e'};
$filter_switch = 1;
}
else {
my $illegals = join(",", keys %filters);
say "Illegal filter option used: '$illegals'. Please specify filters to run with (or use '-f or -f a' to run all defaults)";
say "Filter options available:";
say " o Read support: su=INT";
say " o Read depth: dp=INT";
say " o Read support / depth: rdr=FLOAT";
say " o SQ quality: sq=INT";
say " o Chromosomes: " . join(' ', @keys);
say " o Germline only: g=1";
say " o Exclude calls in bed file: e [bed file]";
die "Please check filter specification\n";
}
my %legalOpts = ( 'su' => 1,
'dp' => 1,
'rdr' => 1,
'sq' => 1,
'chr' => 1,
'gr' => 1,
'gp' => 1,
'st' => 1,
'sn' => 1,
'e' => 1,
'a' => 1
);

my $filter_switch = 0;
my $filter_ref;

if ( exists $filters{'a'} ){
$filter_ref = allFilters(\%filters);
%filters = %{ $filter_ref };
}

($filter_switch, $filter_ref) = checkFilters(\%filters, \%legalOpts, 0, $exclude, \@keys) if scalar keys %filters > 0;
my ($name, $extention) = split(/\.([^.]+)$/, basename($vcf_file), 2);

print "\n";
Expand All @@ -148,21 +135,13 @@

# Write out variants passing filters
# Write out some useful info to txt file
if ( $filters{'g'} ){
print "Printing germline events\n";
svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 1 ) if $print;
svParser::write_summary( $SVs, $name, $summary_out, $method, 1) if $print;
}
else {
svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 0 ) if $print;
svParser::write_summary( $SVs, $name, $summary_out, $method, 0 ) if $print;
}
svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 0 ) if $print;
svParser::write_summary( $SVs, $name, $summary_out, $method, 0 ) if $print;
}

if ($method eq 'snp') {
# Dump all variants to screen
svParser::dump_variants( $SVs, $info, $filter_switch, $chromosome, $method) if $dump;

if ($print or $id ){
die "Print and get variants not supported for SNP data\n";
}
Expand Down Expand Up @@ -200,21 +179,58 @@ sub testCalls {


sub allFilters {
my ($exclude_file, $chroms, $f) = @_;
my $f = shift;
my %filters = %{$f};
say "Running in filter mode, using all default filters:";
say " o Read support >= 4";
say " o Read depth (in both tumour and normal) > 10";
say " o Read support / depth > 0.1";
say " o SQ quality > 10";
say " o Chromosomes: " . join(' ', @{$chroms} );
say " o Excldung calls in regions: $exclude_file";

$filters{'su'} = 4;
$filters{'a'} = 1;
$filters{'su'} = 4;
$filters{'dp'} = 10;
$filters{'rdr'} = 0.1;
$filters{'sq'} = 10;
$filters{'chr'} = 1;
return(\%filters)
}


sub checkFilters {
my ($filter_given, $legalOpts, $filter_switch, $exclude, $chroms) = @_;
my %legalOpts = %{$legalOpts};
my %filters = %{$filter_given};
say "Running in filter mode, using custom filters:";

for my $k (keys %{ $filter_given } ){
if (exists $legalOpts{$k}){
explainFilters(\%legalOpts, $k, $exclude, $chroms);
$filter_switch = 1;
}
else {
printIllegals($k);
}
}
return($filter_switch, $filter_given);
}

sub explainFilters {
my ($legals, $filter_given, $exclude, $chroms) = @_;
my %legalOpts = %{ $legals };
say " o Read support >= $legalOpts{'su'}" if $filter_given eq 'su';
say " o Read depth (in both tumour and normal) > $legalOpts{'dp'}" if $filter_given eq 'dp';
say " o Read support / depth > $legalOpts{'rdr'}" if $filter_given eq 'rdr';
say " o SQ quality > $legalOpts{'sq'}" if $filter_given eq 'sq';
say " o Chromosomes: " . join(' ', @{$chroms}) if $filter_given eq 'chr';
say " o Running in germline PRIVATE mode" if $filter_given eq 'gp';
say " o Running in germline RECURRANT mode" if $filter_given eq 'gr';
say " o Running in somatic TUMOUR mode" if $filter_given eq 'st';
say " o Running in somatic NORMAL mode" if $filter_given eq 'sn';
say " o Excluding calls overlapping: $exclude" if $filter_given eq 'e';
}

sub printIllegals {
my $illegal_option = shift;
say "Illegal filter option used: '$illegal_option'. Please specify filters to run with (or use '-f or -f a' to run all defaults)";
die usage();
}


sub usage {
print
"
Expand Down Expand Up @@ -251,7 +267,10 @@ sub usage {
-f rdr=FLOAT [fraction of supporting reads/tumour depth - a value of 1 would mean all reads support variant]
-f sq=INT [phred-scaled variant likelihood]
-f chr=1 [only show chromosomes in 'chroms.txt'. [Default use Drosophila chroms: 2L 2R 3L 3R 4 X Y]
-f s=1 [only keep somatic tumour events]
-f st=1 [only keep somatic tumour events]
-f sn=1 [only keep somatic normal events]
-f gp=1 [only keep germline private events]
-f gr=1 [only keep germline recurrent events]
-f, -f a [apply default filters: -f su=4 -f dp=10 -f rdr=0.1 -f sq=10 -f chr=1 ]
"
}

0 comments on commit d6fbbf6

Please sign in to comment.