diff --git a/bin/svParser.pm b/bin/svParser.pm index 2a9d14f..a332baa 100755 --- a/bin/svParser.pm +++ b/bin/svParser.pm @@ -272,8 +272,17 @@ sub lumpy { } } - if ( $filter_flags{'s'} ){ - $filters = somatic_filter( $id, $tumour, $control, $normals, $filters, \%PON_info ); + if ( $filter_flags{'st'} ){ + $filters = genotype_filter( $id, $genotype, $filters, 'somatic_tumour' ); + } + elsif ( $filter_flags{'sn'} ){ + $filters = genotype_filter( $id, $genotype, $filters, 'somatic_normal' ); + } + elsif ( $filter_flags{'gp'} ){ + $filters = genotype_filter( $id, $genotype, $filters, 'germline_private' ); + } + elsif ( $filter_flags{'gr'} ){ + $filters = genotype_filter( $id, $genotype, $filters, 'germline_recurrent' ); } my @samples = @{ $samples }; @@ -356,7 +365,7 @@ sub lumpy { my @filter_reasons = @{ $filters }; - ### This might be useless - isn't this happening in somatic_filter() ? + ### This might be useless - isn't this happening in genotype_filter() ? if (@samples > 1){ # In case there are no control samples... # for precise variants: if ($info_block !~ /IMPRECISE;/) { @@ -621,8 +630,8 @@ sub delly { $PON_info{$normal} = [ ($sample_info{$id}{$normal}{'DV'} + $sample_info{$id}{$normal}{'RV'}) , $sample_info{$id}{$normal}{'GT'} ]; } - if ( $filter_flags{'s'} ){ - $filters = somatic_filter( $id, $tumour_name, $control_name, $normals, $filters, \%PON_info ); + if ( $filter_flags{'st'} ){ + $filters = genotype_filter( $id, $genotype, $filters, 'somatic_tumour' ); } if ( $filter_flags{'su'} ){ @@ -642,20 +651,14 @@ sub delly { sub summarise_variants { my ( $SVs, $filter_switch, $region ) = @_; - my ($dels, $dups, $trans, $invs, $filtered) = (0,0,0,0,0); my ($tds, $CNVs, $ins) = (0,0,0); my ( $chromosome, $query_start, $query_stop, $specified_region) = parseChrom($region) if $region; - my %support_by_chrom; - my $read_support; - my %filtered_sv; - my %sv_type_count; - my @gens = qw/germline_recurrent germline_private somatic_tumour somatic_normal/; my @types = qw/DEL DUP INV BND TRA DUP:TANDEM CNV INS /; @@ -666,13 +669,11 @@ sub summarise_variants { } for (keys %{ $SVs } ){ - my ( $chr, $start, $id, $ref, $alt, $quality_score, $filt, $info_block, $format_block, $tumour_info_block, $normal_info_block, $sv_type, $SV_length, $stop, $chr2, $SR, $PE, $ab, $filters, $genotype, $samples ) = @{ $SVs->{$_} }; if ( $chromosome ){ next if $chr ne $chromosome; } - if ( $specified_region ){ if ( ( $start < $query_start or $start > $query_stop ) and ( $stop < $query_stop or $stop > $query_stop ) ){ next; @@ -693,12 +694,9 @@ sub summarise_variants { next if $filter_switch; } - $read_support = ( $SR + $PE ); $support_by_chrom{$id} = [ $read_support, $sv_type, $chr, $SV_length, $start ]; - $sv_type_count{$genotype}{$sv_type}++; - } print "\n"; @@ -1162,23 +1160,12 @@ sub read_depth_filter { } -sub somatic_filter { - my ($id, $tumour_name, $control_name, $PON, $filter_reasons, $PON_info) = @_; - - my %PON_info = % { $PON_info }; +sub genotype_filter { + my ($id, $genotype, $filter_reasons, $selected_genotype ) = @_; my @filter_reasons = @{ $filter_reasons }; - - for my $normal (keys %PON_info) { - - if ( $PON_info{$normal}[1] eq '1/1' or $PON_info{$normal}[1] eq '0/1' ){ - push @filter_reasons, "PON member '$normal' not homozygous for reference genotype=" . $PON_info{$normal}[1]; - } - # Filter if any quality reads support var in any normal in PON - if ( $PON_info{$normal}[0] != 0){ - push @filter_reasons, "PON member '$normal' has quality support for alternative genotype=" . $PON_info{$normal}[0]; + if ( $genotype ne $selected_genotype ){ + push @filter_reasons, "Not $selected_genotype: genotype=" . $genotype; } - - } return(\@filter_reasons); } diff --git a/script/svParse.pl b/script/svParse.pl index ec9730a..6c276f1 100755 --- a/script/svParse.pl +++ b/script/svParse.pl @@ -12,6 +12,7 @@ use feature qw/ say /; use Data::Dumper; +use Data::Printer; use File::Basename; use File::Path qw/ make_path / ; use File::Slurp; @@ -25,7 +26,7 @@ my $chromosome; my $method = "guess"; my $print; -my $exclude; +my $exclude = ""; my %filters; my $PON_print = 5; my %opt; @@ -91,42 +92,28 @@ } } -my $filter_switch= 0; - -if ( scalar keys %filters > 0 ){ - print "\n"; - if ( exists $filters{'a'} ){ - $filter_switch = 1; - my $filter_ref = allFilters($exclude, \@keys, \%filters); - } - elsif ( $filters{'su'} or $filters{'dp'} or $filters{'rdr'} or $filters{'sq'} or $filters{'chr'} or $filters{'g'} or $filters{'e'} or $filters{'n'} or $filters{'s'} ) { - say "Running in filter mode, using custom filters:"; - say " o Read support >= $filters{'su'}" if $filters{'su'}; - say " o Read depth (in both tumour and normal) > $filters{'dp'}" if $filters{'dp'}; - say " o Read support / depth > $filters{'rdr'}" if $filters{'rdr'}; - say " o SQ quality > $filters{'sq'}" if $filters{'sq'}; - say " o Chromosomes: " . join(' ', @keys) if $filters{'chr'}; - say " o Running in germline mode" if $filters{'g'}; - say " o Running in somatic TUMOUR mode" if $filters{'s'}; - say " o Running in somatic NORMAL mode" if $filters{'n'}; - say " o Excluding calls overlapping: $exclude" if $filters{'e'}; - $filter_switch = 1; - } - else { - my $illegals = join(",", keys %filters); - say "Illegal filter option used: '$illegals'. Please specify filters to run with (or use '-f or -f a' to run all defaults)"; - say "Filter options available:"; - say " o Read support: su=INT"; - say " o Read depth: dp=INT"; - say " o Read support / depth: rdr=FLOAT"; - say " o SQ quality: sq=INT"; - say " o Chromosomes: " . join(' ', @keys); - say " o Germline only: g=1"; - say " o Exclude calls in bed file: e [bed file]"; - die "Please check filter specification\n"; - } +my %legalOpts = ( 'su' => 1, + 'dp' => 1, + 'rdr' => 1, + 'sq' => 1, + 'chr' => 1, + 'gr' => 1, + 'gp' => 1, + 'st' => 1, + 'sn' => 1, + 'e' => 1, + 'a' => 1 +); + +my $filter_switch = 0; +my $filter_ref; + +if ( exists $filters{'a'} ){ + $filter_ref = allFilters(\%filters); + %filters = %{ $filter_ref }; } +($filter_switch, $filter_ref) = checkFilters(\%filters, \%legalOpts, 0, $exclude, \@keys) if scalar keys %filters > 0; my ($name, $extention) = split(/\.([^.]+)$/, basename($vcf_file), 2); print "\n"; @@ -148,21 +135,13 @@ # Write out variants passing filters # Write out some useful info to txt file - if ( $filters{'g'} ){ - print "Printing germline events\n"; - svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 1 ) if $print; - svParser::write_summary( $SVs, $name, $summary_out, $method, 1) if $print; - } - else { - svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 0 ) if $print; - svParser::write_summary( $SVs, $name, $summary_out, $method, 0 ) if $print; - } + svParser::print_variants( $SVs, $filtered_vars, $name, $filtered_out, 0 ) if $print; + svParser::write_summary( $SVs, $name, $summary_out, $method, 0 ) if $print; } if ($method eq 'snp') { # Dump all variants to screen svParser::dump_variants( $SVs, $info, $filter_switch, $chromosome, $method) if $dump; - if ($print or $id ){ die "Print and get variants not supported for SNP data\n"; } @@ -200,21 +179,58 @@ sub testCalls { sub allFilters { - my ($exclude_file, $chroms, $f) = @_; + my $f = shift; my %filters = %{$f}; - say "Running in filter mode, using all default filters:"; - say " o Read support >= 4"; - say " o Read depth (in both tumour and normal) > 10"; - say " o Read support / depth > 0.1"; - say " o SQ quality > 10"; - say " o Chromosomes: " . join(' ', @{$chroms} ); - say " o Excldung calls in regions: $exclude_file"; - - $filters{'su'} = 4; + $filters{'a'} = 1; + $filters{'su'} = 4; + $filters{'dp'} = 10; + $filters{'rdr'} = 0.1; + $filters{'sq'} = 10; + $filters{'chr'} = 1; return(\%filters) } +sub checkFilters { + my ($filter_given, $legalOpts, $filter_switch, $exclude, $chroms) = @_; + my %legalOpts = %{$legalOpts}; + my %filters = %{$filter_given}; + say "Running in filter mode, using custom filters:"; + + for my $k (keys %{ $filter_given } ){ + if (exists $legalOpts{$k}){ + explainFilters(\%legalOpts, $k, $exclude, $chroms); + $filter_switch = 1; + } + else { + printIllegals($k); + } + } + return($filter_switch, $filter_given); +} + +sub explainFilters { + my ($legals, $filter_given, $exclude, $chroms) = @_; + my %legalOpts = %{ $legals }; + say " o Read support >= $legalOpts{'su'}" if $filter_given eq 'su'; + say " o Read depth (in both tumour and normal) > $legalOpts{'dp'}" if $filter_given eq 'dp'; + say " o Read support / depth > $legalOpts{'rdr'}" if $filter_given eq 'rdr'; + say " o SQ quality > $legalOpts{'sq'}" if $filter_given eq 'sq'; + say " o Chromosomes: " . join(' ', @{$chroms}) if $filter_given eq 'chr'; + say " o Running in germline PRIVATE mode" if $filter_given eq 'gp'; + say " o Running in germline RECURRANT mode" if $filter_given eq 'gr'; + say " o Running in somatic TUMOUR mode" if $filter_given eq 'st'; + say " o Running in somatic NORMAL mode" if $filter_given eq 'sn'; + say " o Excluding calls overlapping: $exclude" if $filter_given eq 'e'; +} + +sub printIllegals { + my $illegal_option = shift; + say "Illegal filter option used: '$illegal_option'. Please specify filters to run with (or use '-f or -f a' to run all defaults)"; + die usage(); +} + + sub usage { print " @@ -251,7 +267,10 @@ sub usage { -f rdr=FLOAT [fraction of supporting reads/tumour depth - a value of 1 would mean all reads support variant] -f sq=INT [phred-scaled variant likelihood] -f chr=1 [only show chromosomes in 'chroms.txt'. [Default use Drosophila chroms: 2L 2R 3L 3R 4 X Y] - -f s=1 [only keep somatic tumour events] + -f st=1 [only keep somatic tumour events] + -f sn=1 [only keep somatic normal events] + -f gp=1 [only keep germline private events] + -f gr=1 [only keep germline recurrent events] -f, -f a [apply default filters: -f su=4 -f dp=10 -f rdr=0.1 -f sq=10 -f chr=1 ] " }