From 079c1280cabc658c537ada8e6bd8c23d661aa12c Mon Sep 17 00:00:00 2001 From: Lucas Czech Date: Thu, 1 Aug 2024 15:07:24 -0400 Subject: [PATCH] Add window averaging with a provided mask #24 --- src/commands/analyze/diversity.cpp | 14 +- src/commands/analyze/fst.cpp | 13 +- src/commands/analyze/fst_cathedral.cpp | 4 +- src/options/diversity_processor.cpp | 3 +- src/options/diversity_processor.hpp | 11 +- src/options/fst_processor.cpp | 66 +++++++-- src/options/fst_processor.hpp | 14 +- src/options/variant_filter_mask.cpp | 1 - src/options/variant_input.hpp | 8 + src/options/window_average.cpp | 194 ++++++++++++++++++++++++- src/options/window_average.hpp | 36 ++++- 11 files changed, 338 insertions(+), 26 deletions(-) diff --git a/src/commands/analyze/diversity.cpp b/src/commands/analyze/diversity.cpp index 3add793..cbb5ec3 100644 --- a/src/commands/analyze/diversity.cpp +++ b/src/commands/analyze/diversity.cpp @@ -88,7 +88,9 @@ void setup_diversity( CLI::App& app ) // ------------------------------------------------------------------------- // Diversity Processor - options->diversity_processor.add_diversity_processor_opts_to_app( sub ); + options->diversity_processor.add_diversity_processor_opts_to_app( + sub, options->variant_input.get_variant_reference_genome_options() + ); // Settings: Omit Empty Windows options->no_extra_columns.option = sub->add_flag( @@ -162,6 +164,9 @@ void setup_diversity( CLI::App& app ) */ struct DiversityCommandState { + // Provided loci, if needed for window averaging + std::shared_ptr provided_loci; + // Which values are actually being computed? bool compute_theta_pi; bool compute_theta_wa; @@ -193,6 +198,9 @@ DiversityCommandState prepare_output_data_( ) { DiversityCommandState output; + // Provided loci, if needed for window averaging + output.provided_loci = options.diversity_processor.get_provided_loci(); + // Get the measures to compute. At least one of them will be active. output.compute_theta_pi = options.diversity_processor.compute_theta_pi(); output.compute_theta_wa = options.diversity_processor.compute_theta_watterson(); @@ -382,7 +390,7 @@ void write_output_popoolation_line_( }; // Get the results for the whole set of calculators. - auto const results = processor.get_result( window, nullptr ); // TODO provided_loci + auto const results = processor.get_result( window, output.provided_loci ); auto const total_stats = variant_filter_stats_category_counts( processor.get_filter_stats() ); @@ -479,7 +487,7 @@ void write_output_table_line_( } // Get the results for the whole set of calculators. - auto const results = processor.get_result( window, nullptr ); // TODO provided_loci + auto const results = processor.get_result( window, output.provided_loci ); // Write to all individual files for each sample and each value. for( size_t i = 0; i < processor.calculators().size(); ++i ) { diff --git a/src/commands/analyze/fst.cpp b/src/commands/analyze/fst.cpp index 586dc75..f8a458e 100644 --- a/src/commands/analyze/fst.cpp +++ b/src/commands/analyze/fst.cpp @@ -102,7 +102,9 @@ void setup_fst( CLI::App& app ) // ------------------------------------------------------------------------- // Fst Processor - options->fst_processor.add_fst_processor_opts_to_app( sub ); + options->fst_processor.add_fst_processor_opts_to_app( + sub, options->variant_input.get_variant_reference_genome_options() + ); // Settings: Write pi tables options->write_pi_tables.option = sub->add_flag( @@ -171,6 +173,10 @@ void setup_fst( CLI::App& app ) */ struct FstCommandState { + // Provided loci, if needed for window averaging + std::shared_ptr provided_loci; + + // Method for computing FST FstProcessorOptions::FstMethod method; // The output to write to, for per-window output. @@ -523,7 +529,7 @@ void write_to_output_files_( FstCommandState& state ) { // Get the results and check them. - auto const& window_fst = processor.get_result( window, nullptr ); // TODO provided_loci + auto const& window_fst = processor.get_result( window, state.provided_loci ); internal_check( window_fst.size() == sample_pairs.size(), "Inconsistent size of window fst values and sample pairs." @@ -540,7 +546,7 @@ void write_to_output_files_( ); // Get the values to be printed here - pi_vectors = &processor.get_pi_vectors( window, nullptr ); // TODO provided_loci + pi_vectors = &processor.get_pi_vectors( window, state.provided_loci ); internal_check( std::get<0>(*pi_vectors).size() == sample_pairs.size() ); internal_check( std::get<1>(*pi_vectors).size() == sample_pairs.size() ); internal_check( std::get<2>(*pi_vectors).size() == sample_pairs.size() ); @@ -616,6 +622,7 @@ void run_fst( FstOptions const& options ) // The state POD that stores the run objects in one place. FstCommandState state; + state.provided_loci = options.fst_processor.get_provided_loci(); state.method = options.fst_processor.get_fst_method(); state.is_all_to_all = options.fst_processor.is_all_to_all(); state.is_whole_genome = options.window.window_type() == WindowOptions::WindowType::kGenome; diff --git a/src/commands/analyze/fst_cathedral.cpp b/src/commands/analyze/fst_cathedral.cpp index 2719c04..883e693 100644 --- a/src/commands/analyze/fst_cathedral.cpp +++ b/src/commands/analyze/fst_cathedral.cpp @@ -117,7 +117,9 @@ void setup_fst_cathedral( CLI::App& app ) fst_processor_params.with_all_methods = false; fst_processor_params.with_window_average_policy = false; fst_processor_params.fix_window_average_policy = genesis::population::WindowAveragePolicy::kSum; - options->fst_processor.add_fst_processor_opts_to_app( sub, fst_processor_params ); + options->fst_processor.add_fst_processor_opts_to_app( + sub, options->variant_input.get_variant_reference_genome_options(), fst_processor_params + ); // Width options->cathedral_width.option = sub->add_option( diff --git a/src/options/diversity_processor.cpp b/src/options/diversity_processor.cpp index 0ce8880..3dcd8c2 100644 --- a/src/options/diversity_processor.cpp +++ b/src/options/diversity_processor.cpp @@ -72,6 +72,7 @@ tajima_d_denominator_policy_map = { void DiversityProcessorOptions::add_diversity_processor_opts_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, std::string const& group ) { @@ -80,8 +81,8 @@ void DiversityProcessorOptions::add_diversity_processor_opts_to_app( // ------------------------------------------------------------------------- // Pool sizes and window averaging + window_average_policy.add_window_average_opt_to_app( sub, ref_genome_opts ); poolsizes.add_poolsizes_opt_to_app( sub, true, group ); - window_average_policy.add_window_average_opt_to_app( sub, group ); // Settings: Tajima Denominator Policy tajima_d_denominator_policy.option = sub->add_option( diff --git a/src/options/diversity_processor.hpp b/src/options/diversity_processor.hpp index 5bbc26c..d2ae420 100644 --- a/src/options/diversity_processor.hpp +++ b/src/options/diversity_processor.hpp @@ -26,6 +26,7 @@ #include "CLI/CLI.hpp" +#include "options/variant_reference_genome.hpp" #include "options/poolsizes.hpp" #include "options/window_average.hpp" #include "tools/cli_option.hpp" @@ -33,8 +34,10 @@ #include "genesis/population/filter/sample_counts_filter_numerical.hpp" #include "genesis/population/filter/sample_counts_filter.hpp" #include "genesis/population/function/diversity_pool_processor.hpp" +#include "genesis/population/genome_locus_set.hpp" #include +#include #include #include #include @@ -73,6 +76,7 @@ class DiversityProcessorOptions void add_diversity_processor_opts_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, std::string const& group = "Settings" ); @@ -100,6 +104,11 @@ class DiversityProcessorOptions return ! no_tajima_d.value; } + std::shared_ptr get_provided_loci() const + { + return window_average_policy.get_provided_loci(); + } + // ------------------------------------------------------------------------- // Option Members // ------------------------------------------------------------------------- @@ -113,8 +122,8 @@ class DiversityProcessorOptions CliOption no_tajima_d = false; // General settings - PoolsizesOptions poolsizes; WindowAverageOptions window_average_policy; + PoolsizesOptions poolsizes; // Remnant settings. These are simply re-used from the numerical filters now. // Kept here for reference, just in case they are needed for later. diff --git a/src/options/fst_processor.cpp b/src/options/fst_processor.cpp index 6186948..6a7031c 100644 --- a/src/options/fst_processor.cpp +++ b/src/options/fst_processor.cpp @@ -71,6 +71,7 @@ std::vector> const fst_me void FstProcessorOptions::add_fst_processor_opts_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, Params const& fst_processor_params, std::string const& group ) { @@ -79,6 +80,13 @@ void FstProcessorOptions::add_fst_processor_opts_to_app( // Basic Fst Settings // ------------------------------------------------------------------------- + // Window averaging + if( fst_processor_params.with_window_average_policy ) { + window_average_policy.add_window_average_opt_to_app( + sub, ref_genome_opts, !fst_processor_params.with_all_methods + ); + } + // Settings: FST Method if( fst_processor_params.with_all_methods ) { method.option = sub->add_option( @@ -111,10 +119,7 @@ void FstProcessorOptions::add_fst_processor_opts_to_app( // which do need pool sizes, so then we require this to be provided. poolsizes.add_poolsizes_opt_to_app( sub, !fst_processor_params.with_all_methods, group ); - // Window averaging - if( fst_processor_params.with_window_average_policy ) { - window_average_policy.add_window_average_opt_to_app( sub, group ); - } + // Store for later params = fst_processor_params; // ------------------------------------------------------------------------- @@ -337,7 +342,7 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo using namespace genesis::population; // Get the method that we want to use. - auto const method = get_fst_method(); + auto const method_value = get_fst_method(); // Get all sample indices that we are actually interested in. // We do this so that pool sizes for samples that we ignore anyway do not need to be given. @@ -350,9 +355,9 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo // Get the pool sizes for all samples that we are interested in. auto const needs_pool_sizes = ( - method == FstMethod::kUnbiasedNei || - method == FstMethod::kUnbiasedHudson || - method == FstMethod::kKofler + method_value == FstMethod::kUnbiasedNei || + method_value == FstMethod::kUnbiasedHudson || + method_value == FstMethod::kKofler ); auto const pool_sizes = ( needs_pool_sizes @@ -360,6 +365,47 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo : std::vector( sample_names.size(), 0 ) ); + // Check the window averaging setup + auto const needs_window_average_policy = ( + method_value == FstMethod::kUnbiasedNei || + method_value == FstMethod::kUnbiasedHudson + ); + switch( method_value ) { + case FstMethod::kUnbiasedNei: + case FstMethod::kUnbiasedHudson: { + if( + ! window_average_policy.get_window_average_policy_option().option || + !*window_average_policy.get_window_average_policy_option().option + ) { + throw CLI::ValidationError( + method.option->get_name() + ", " + + window_average_policy.get_window_average_policy_option().option->get_name(), + "Window average policy option needs to be provided with " + + method.option->get_name() + " " + method.value + ); + } + break; + } + case FstMethod::kKofler: + case FstMethod::kKarlsson: { + if( + window_average_policy.get_window_average_policy_option().option && + *window_average_policy.get_window_average_policy_option().option + ) { + throw CLI::ValidationError( + method.option->get_name() + ", " + + window_average_policy.get_window_average_policy_option().option->get_name(), + "Window average policy option cannot be used with " + + method.option->get_name() + " " + method.value + ); + } + break; + } + default: { + throw std::domain_error( "Internal error: Invalid FST method." ); + } + } + // For our unbiased and for Kofler, check that we got the right number of pool sizes. internal_check( ! needs_pool_sizes || pool_sizes.size() == sample_names.size(), @@ -368,14 +414,14 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo // Get the window average policy to use for all processors. auto const win_avg_policy = ( - params.with_window_average_policy + params.with_window_average_policy && needs_window_average_policy ? window_average_policy.get_window_average_policy() : params.fix_window_average_policy ); // Make the type of processor that we need for the provided method. FstPoolProcessor processor; - switch( method ) { + switch( method_value ) { case FstMethod::kUnbiasedNei: { processor = make_fst_pool_processor( sample_pairs, pool_sizes, win_avg_policy, diff --git a/src/options/fst_processor.hpp b/src/options/fst_processor.hpp index 0005c8d..5d3c9b5 100644 --- a/src/options/fst_processor.hpp +++ b/src/options/fst_processor.hpp @@ -26,13 +26,16 @@ #include "CLI/CLI.hpp" +#include "options/variant_reference_genome.hpp" #include "options/poolsizes.hpp" #include "options/window_average.hpp" #include "tools/cli_option.hpp" #include "genesis/population/function/fst_pool_processor.hpp" #include "genesis/population/function/window_average.hpp" +#include "genesis/population/genome_locus_set.hpp" +#include #include #include #include @@ -95,16 +98,18 @@ class FstProcessorOptions void add_fst_processor_opts_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, std::string const& group = "Settings" ) { // Yet again we need an overload due to the stupid compiler bug about // default constructed arguments, see https://stackoverflow.com/q/43819314/4184258 Params params; - return add_fst_processor_opts_to_app( sub, params, group ); + return add_fst_processor_opts_to_app( sub, ref_genome_opts, params, group ); } void add_fst_processor_opts_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, Params const& fst_processor_params, std::string const& group = "Settings" ); @@ -126,15 +131,20 @@ class FstProcessorOptions std::vector> const& sample_pairs ) const; + std::shared_ptr get_provided_loci() const + { + return window_average_policy.get_provided_loci(); + } + // ------------------------------------------------------------------------- // Option Members // ------------------------------------------------------------------------- private: + WindowAverageOptions window_average_policy; CliOption method = "unbiased-nei"; PoolsizesOptions poolsizes; - WindowAverageOptions window_average_policy; Params params; CliOption comparand = ""; diff --git a/src/options/variant_filter_mask.cpp b/src/options/variant_filter_mask.cpp index 32a87b8..4d48165 100644 --- a/src/options/variant_filter_mask.cpp +++ b/src/options/variant_filter_mask.cpp @@ -33,7 +33,6 @@ #include "genesis/population/format/vcf_input_stream.hpp" #include "genesis/population/function/functions.hpp" #include "genesis/population/function/genome_locus_set.hpp" -#include "genesis/population/function/genome_locus_set.hpp" #include "genesis/population/function/genome_region.hpp" #include "genesis/population/genome_region.hpp" #include "genesis/sequence/functions/dict.hpp" diff --git a/src/options/variant_input.hpp b/src/options/variant_input.hpp index b8986e4..3183ced 100644 --- a/src/options/variant_input.hpp +++ b/src/options/variant_input.hpp @@ -193,6 +193,14 @@ class VariantInputOptions // Run Functions // ------------------------------------------------------------------------- + /** + * @brief Return the VariantReferenceGenomeOptions used for the input + */ + VariantReferenceGenomeOptions const& get_variant_reference_genome_options() const + { + return reference_genome_options_; + } + /** * @brief Return the pointer to the reference genome, if provided, or nullptr. */ diff --git a/src/options/window_average.cpp b/src/options/window_average.cpp index 0ef87d0..3d31865 100644 --- a/src/options/window_average.cpp +++ b/src/options/window_average.cpp @@ -26,6 +26,14 @@ #include "options/global.hpp" #include "tools/misc.hpp" +#include "genesis/utils/text/string.hpp" +#include "genesis/population/format/bed_reader.hpp" +#include "genesis/population/function/functions.hpp" +#include "genesis/population/function/genome_locus_set.hpp" +#include "genesis/sequence/functions/dict.hpp" +#include "genesis/utils/core/fs.hpp" +#include "genesis/utils/io/input_source.hpp" +#include "genesis/utils/text/convert.hpp" #include "genesis/utils/text/string.hpp" #include @@ -42,7 +50,8 @@ std::vector> const window_average_po { "available-loci", WindowAveragePolicy::kAvailableLoci }, { "valid-loci", WindowAveragePolicy::kValidLoci }, { "valid-snps", WindowAveragePolicy::kValidSnps }, - { "sum", WindowAveragePolicy::kSum } + { "sum", WindowAveragePolicy::kSum }, + { "provided-loci", WindowAveragePolicy::kProvidedLoci } }; // ================================================================================================= @@ -51,6 +60,8 @@ std::vector> const window_average_po CLI::Option* WindowAverageOptions::add_window_average_opt_to_app( CLI::App* sub, + VariantReferenceGenomeOptions const& ref_genome_opts, + bool required, std::string const& group ) { // Correct setup check. @@ -58,6 +69,15 @@ CLI::Option* WindowAverageOptions::add_window_average_opt_to_app( window_average_policy_.option == nullptr, "Cannot use the same WindowAverageOptions object multiple times." ); + internal_check( + !ref_genome_opts_ || ref_genome_opts_ == &ref_genome_opts, + "Cannot use the same VariantFilterMaskOptions with different VariantReferenceGenomeOptions" + ); + ref_genome_opts_ = &ref_genome_opts; + + // ------------------------------------------------------------------------- + // Policy + // ------------------------------------------------------------------------- // Add the option. We need quite the bit of documentation here... window_average_policy_.option = sub->add_option( @@ -75,12 +95,95 @@ CLI::Option* WindowAverageOptions::add_window_average_opt_to_app( "but can be useful when the data only consists of SNPs." "\n(5) `sum`: Simply report the sum of the per-site values, with no averaging " "applied to it. This can be used to apply custom averaging later." + "\n(6) `provided-loci`: Use the exact loci provided via `--window-average-loci-bed` or " + "`--window-average-loci-fasta` to determine the denominator for the window averaging, " + "by counting all positions set in this mask in the given window." ); window_average_policy_.option->transform( CLI::IsMember( enum_map_keys( window_average_policy_map ), CLI::ignore_case ) ); window_average_policy_.option->group( group ); - window_average_policy_.option->required(); + if( required ) { + window_average_policy_.option->required(); + } + + // ------------------------------------------------------------------------- + // Provided Loci BED + // ------------------------------------------------------------------------- + + // Add option for provided loci by BED file. + window_average_loci_bed_.option = sub->add_option( + "--window-average-loci-bed", + window_average_loci_bed_.value, + "Genomic positions to use for `--window-average-policy provided-loci`, as a BED file. " + "The regions listed in the BED file are counted towards the window average denominator." + "\nThis only uses the chromosome, as well as start and end information per line, and " + "ignores everything else in the file. Note that BED uses 0-based positions, and a half-open " + "`[)` interval for the end position; simply using columns extracted from other file formats " + "(such as vcf or gff) will not work." + + ); + window_average_loci_bed_.option->check( CLI::ExistingFile ); + window_average_loci_bed_.option->group( group ); + window_average_loci_bed_.option->needs( window_average_policy_.option ); + + // Add inversion option for above mask option. + window_average_loci_bed_inv_.option = sub->add_flag( + "--window-average-loci-bed-invert", + window_average_loci_bed_inv_.value, + "When using `--window-average-loci-bed`, invert the set of loci. " + "When this flag is set, the loci that are not set are used for the denominator. " + "Needs one of " + ref_genome_opts_->get_reference_option_names() + + " to determine chromosome lengths." + ); + window_average_loci_bed_inv_.option->group( group ); + window_average_loci_bed_inv_.option->needs( window_average_loci_bed_.option ); + + // ------------------------------------------------------------------------- + // Provided Loci FASTA + // ------------------------------------------------------------------------- + + // Add option for provided loci by fasta-style mask file. + window_average_loci_fasta_.option = sub->add_option( + "--window-average-loci-fasta", + window_average_loci_fasta_.value, + "Genomic positions to use for `--window-average-policy provided-loci`, as a FASTA-like mask " + "file (such as used by vcftools).\n" + "The file contains a sequence of integer digits `[0-9]`, one for each position on the " + "chromosomes, which specify if the position should be counted towards the window denominator. " + "Any positions with digits above the `--window-average-loci-fasta-min` value are used. " + ); + window_average_loci_fasta_.option->check( CLI::ExistingFile ); + window_average_loci_fasta_.option->group( group ); + window_average_loci_fasta_.option->needs( window_average_policy_.option ); + + // Add min threshold option for above mask option. + window_average_loci_fasta_min_.option = sub->add_option( + "--window-average-loci-fasta-min", + window_average_loci_fasta_min_.value, + "When using `--window-average-loci-fasta`, set the cutoff threshold for the counted digits. " + "All positions above that value are counted. The default is 0, meaning that only exactly " + "the positons with value 0 will not be counted." + ); + window_average_loci_fasta_min_.option->check(CLI::Range(0,9)); + window_average_loci_fasta_min_.option->group( group ); + window_average_loci_fasta_min_.option->needs( window_average_loci_fasta_.option ); + + // Add inversion option for above mask option. + window_average_loci_fasta_inv_.option = sub->add_flag( + "--window-average-loci-fasta-invert", + window_average_loci_fasta_inv_.value, + "When using `--window-average-loci-fasta`, invert the set of loci. " + "When it is set, all positions in the FASTA-like file below or equal to the threshold " + "are counted towards the window average denominator." + ); + window_average_loci_fasta_inv_.option->group( group ); + window_average_loci_fasta_inv_.option->needs( window_average_loci_fasta_.option ); + + // We only want to be able to specify a single mask file, as everything else would lead to chaos. + window_average_loci_bed_.option->excludes( window_average_loci_fasta_.option ); + window_average_loci_fasta_.option->excludes( window_average_loci_bed_.option ); + return window_average_policy_.option; } @@ -90,8 +193,93 @@ CLI::Option* WindowAverageOptions::add_window_average_opt_to_app( genesis::population::WindowAveragePolicy WindowAverageOptions::get_window_average_policy() const { - return get_enum_map_value( + // Use the enum map to get the value we want + auto const result = get_enum_map_value( window_average_policy_map, window_average_policy_.value ); + + // User error checks when option not provided. + // We do not always need it, but when requested here, we need to get it. + if( !*window_average_policy_.option ) { + throw CLI::ValidationError( + window_average_policy_.option->get_name(), + "Window average policy is required to be set for this combination of options" + ); + } + + // User error checks for provided loci + if( result == WindowAveragePolicy::kProvidedLoci ) { + if( !*window_average_loci_bed_.option && !*window_average_loci_fasta_.option ) { + throw CLI::ValidationError( + window_average_policy_.option->get_name(), + "Window average policy `provided-loci` is set, but no loci file was provided" + ); + } + } else { + if( *window_average_loci_bed_.option ) { + throw CLI::ValidationError( + window_average_policy_.option->get_name() + ", " + + window_average_loci_bed_.option->get_name(), + "Provided loci BED file is given, but window average policy `provided-loci` is not set" + ); + } + if( *window_average_loci_fasta_.option ) { + throw CLI::ValidationError( + window_average_policy_.option->get_name() + ", " + + window_average_loci_fasta_.option->get_name(), + "Provided loci FASTA file is given, but window average policy `provided-loci` is not set" + ); + } + } + + return result; +} + +void WindowAverageOptions::prepare_provided_loci_() const +{ + using namespace genesis; + using namespace genesis::population; + using namespace genesis::sequence; + using namespace genesis::utils; + internal_check( + ref_genome_opts_, "WindowAverageOptions needs VariantReferenceGenomeOptions" + ); + + // Not running again if we already have set up a filter (i.e., if the shared pointer has data). + if( provided_loci_ ) { + return; + } + + // Add the provided loci from a bed file. + if( *window_average_loci_bed_.option ) { + LOG_MSG2 << "Reading provided loci BED file " << window_average_loci_bed_.value; + provided_loci_ = std::make_shared( + BedReader().read_as_genome_locus_set( from_file( window_average_loci_bed_.value )) + ); + if( window_average_loci_bed_inv_.value ) { + auto ref_dict = ref_genome_opts_->get_reference_dict(); + if( !ref_dict ) { + throw CLI::ValidationError( + window_average_loci_bed_inv_.option->get_name(), + "Cannot invert BED provided loci without one of " + + ref_genome_opts_->get_reference_option_names() + + " being provided to determine the chromosome lengths" + ); + } + provided_loci_->invert( *ref_dict ); + } + } + + // Add the provided loci from a fasta file. + if( *window_average_loci_fasta_.option ) { + LOG_MSG2 << "Reading provided loci FASTA file " << window_average_loci_fasta_.value; + provided_loci_ = std::make_shared( + read_mask_fasta( + from_file( window_average_loci_fasta_.value ), + window_average_loci_fasta_min_.value, + window_average_loci_fasta_inv_.value + ) + ); + } } diff --git a/src/options/window_average.hpp b/src/options/window_average.hpp index 6b331bd..53cc479 100644 --- a/src/options/window_average.hpp +++ b/src/options/window_average.hpp @@ -26,10 +26,14 @@ #include "CLI/CLI.hpp" +#include "options/variant_reference_genome.hpp" #include "tools/cli_option.hpp" #include "genesis/population/function/window_average.hpp" +#include "genesis/population/genome_locus_set.hpp" +#include "genesis/sequence/sequence_dict.hpp" +#include #include #include @@ -66,7 +70,9 @@ class WindowAverageOptions CLI::Option* add_window_average_opt_to_app( CLI::App* sub, - std::string const& group = "Settings" + VariantReferenceGenomeOptions const& ref_genome_opts, + bool required = true, + std::string const& group = "Window Averaging" ); // ------------------------------------------------------------------------- @@ -78,6 +84,24 @@ class WindowAverageOptions */ genesis::population::WindowAveragePolicy get_window_average_policy() const; + /** + * @brief Get the user-provided loci for window averaging, or nullptr if not provided. + */ + std::shared_ptr get_provided_loci() const + { + prepare_provided_loci_(); + return provided_loci_; + } + + CliOption const& get_window_average_policy_option() const + { + return window_average_policy_; + } + +private: + + void prepare_provided_loci_() const; + // ------------------------------------------------------------------------- // Option Members // ------------------------------------------------------------------------- @@ -85,6 +109,16 @@ class WindowAverageOptions private: CliOption window_average_policy_; + CliOption window_average_loci_bed_; + CliOption window_average_loci_bed_inv_ = false; + CliOption window_average_loci_fasta_; + CliOption window_average_loci_fasta_min_ = 0; + CliOption window_average_loci_fasta_inv_ = false; + + // We need a reference dict, for inverting bed files + VariantReferenceGenomeOptions const* ref_genome_opts_ = nullptr; + + mutable std::shared_ptr provided_loci_; };