Skip to content

Commit

Permalink
Add window averaging with a provided mask #24
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Aug 1, 2024
1 parent a9d1cbb commit 079c128
Show file tree
Hide file tree
Showing 11 changed files with 338 additions and 26 deletions.
14 changes: 11 additions & 3 deletions src/commands/analyze/diversity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ void setup_diversity( CLI::App& app )
// -------------------------------------------------------------------------

// Diversity Processor
options->diversity_processor.add_diversity_processor_opts_to_app( sub );
options->diversity_processor.add_diversity_processor_opts_to_app(
sub, options->variant_input.get_variant_reference_genome_options()
);

// Settings: Omit Empty Windows
options->no_extra_columns.option = sub->add_flag(
Expand Down Expand Up @@ -162,6 +164,9 @@ void setup_diversity( CLI::App& app )
*/
struct DiversityCommandState
{
// Provided loci, if needed for window averaging
std::shared_ptr<genesis::population::GenomeLocusSet> provided_loci;

// Which values are actually being computed?
bool compute_theta_pi;
bool compute_theta_wa;
Expand Down Expand Up @@ -193,6 +198,9 @@ DiversityCommandState prepare_output_data_(
) {
DiversityCommandState output;

// Provided loci, if needed for window averaging
output.provided_loci = options.diversity_processor.get_provided_loci();

// Get the measures to compute. At least one of them will be active.
output.compute_theta_pi = options.diversity_processor.compute_theta_pi();
output.compute_theta_wa = options.diversity_processor.compute_theta_watterson();
Expand Down Expand Up @@ -382,7 +390,7 @@ void write_output_popoolation_line_(
};

// Get the results for the whole set of calculators.
auto const results = processor.get_result( window, nullptr ); // TODO provided_loci
auto const results = processor.get_result( window, output.provided_loci );
auto const total_stats = variant_filter_stats_category_counts(
processor.get_filter_stats()
);
Expand Down Expand Up @@ -479,7 +487,7 @@ void write_output_table_line_(
}

// Get the results for the whole set of calculators.
auto const results = processor.get_result( window, nullptr ); // TODO provided_loci
auto const results = processor.get_result( window, output.provided_loci );

// Write to all individual files for each sample and each value.
for( size_t i = 0; i < processor.calculators().size(); ++i ) {
Expand Down
13 changes: 10 additions & 3 deletions src/commands/analyze/fst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ void setup_fst( CLI::App& app )
// -------------------------------------------------------------------------

// Fst Processor
options->fst_processor.add_fst_processor_opts_to_app( sub );
options->fst_processor.add_fst_processor_opts_to_app(
sub, options->variant_input.get_variant_reference_genome_options()
);

// Settings: Write pi tables
options->write_pi_tables.option = sub->add_flag(
Expand Down Expand Up @@ -171,6 +173,10 @@ void setup_fst( CLI::App& app )
*/
struct FstCommandState
{
// Provided loci, if needed for window averaging
std::shared_ptr<genesis::population::GenomeLocusSet> provided_loci;

// Method for computing FST
FstProcessorOptions::FstMethod method;

// The output to write to, for per-window output.
Expand Down Expand Up @@ -523,7 +529,7 @@ void write_to_output_files_(
FstCommandState& state
) {
// Get the results and check them.
auto const& window_fst = processor.get_result( window, nullptr ); // TODO provided_loci
auto const& window_fst = processor.get_result( window, state.provided_loci );
internal_check(
window_fst.size() == sample_pairs.size(),
"Inconsistent size of window fst values and sample pairs."
Expand All @@ -540,7 +546,7 @@ void write_to_output_files_(
);

// Get the values to be printed here
pi_vectors = &processor.get_pi_vectors( window, nullptr ); // TODO provided_loci
pi_vectors = &processor.get_pi_vectors( window, state.provided_loci );
internal_check( std::get<0>(*pi_vectors).size() == sample_pairs.size() );
internal_check( std::get<1>(*pi_vectors).size() == sample_pairs.size() );
internal_check( std::get<2>(*pi_vectors).size() == sample_pairs.size() );
Expand Down Expand Up @@ -616,6 +622,7 @@ void run_fst( FstOptions const& options )

// The state POD that stores the run objects in one place.
FstCommandState state;
state.provided_loci = options.fst_processor.get_provided_loci();
state.method = options.fst_processor.get_fst_method();
state.is_all_to_all = options.fst_processor.is_all_to_all();
state.is_whole_genome = options.window.window_type() == WindowOptions::WindowType::kGenome;
Expand Down
4 changes: 3 additions & 1 deletion src/commands/analyze/fst_cathedral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ void setup_fst_cathedral( CLI::App& app )
fst_processor_params.with_all_methods = false;
fst_processor_params.with_window_average_policy = false;
fst_processor_params.fix_window_average_policy = genesis::population::WindowAveragePolicy::kSum;
options->fst_processor.add_fst_processor_opts_to_app( sub, fst_processor_params );
options->fst_processor.add_fst_processor_opts_to_app(
sub, options->variant_input.get_variant_reference_genome_options(), fst_processor_params
);

// Width
options->cathedral_width.option = sub->add_option(
Expand Down
3 changes: 2 additions & 1 deletion src/options/diversity_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ tajima_d_denominator_policy_map = {

void DiversityProcessorOptions::add_diversity_processor_opts_to_app(
CLI::App* sub,
VariantReferenceGenomeOptions const& ref_genome_opts,
std::string const& group
) {

Expand All @@ -80,8 +81,8 @@ void DiversityProcessorOptions::add_diversity_processor_opts_to_app(
// -------------------------------------------------------------------------

// Pool sizes and window averaging
window_average_policy.add_window_average_opt_to_app( sub, ref_genome_opts );
poolsizes.add_poolsizes_opt_to_app( sub, true, group );
window_average_policy.add_window_average_opt_to_app( sub, group );

// Settings: Tajima Denominator Policy
tajima_d_denominator_policy.option = sub->add_option(
Expand Down
11 changes: 10 additions & 1 deletion src/options/diversity_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,18 @@

#include "CLI/CLI.hpp"

#include "options/variant_reference_genome.hpp"
#include "options/poolsizes.hpp"
#include "options/window_average.hpp"
#include "tools/cli_option.hpp"

#include "genesis/population/filter/sample_counts_filter_numerical.hpp"
#include "genesis/population/filter/sample_counts_filter.hpp"
#include "genesis/population/function/diversity_pool_processor.hpp"
#include "genesis/population/genome_locus_set.hpp"

#include <string>
#include <memory>
#include <utility>
#include <tuple>
#include <vector>
Expand Down Expand Up @@ -73,6 +76,7 @@ class DiversityProcessorOptions

void add_diversity_processor_opts_to_app(
CLI::App* sub,
VariantReferenceGenomeOptions const& ref_genome_opts,
std::string const& group = "Settings"
);

Expand Down Expand Up @@ -100,6 +104,11 @@ class DiversityProcessorOptions
return ! no_tajima_d.value;
}

std::shared_ptr<genesis::population::GenomeLocusSet> get_provided_loci() const
{
return window_average_policy.get_provided_loci();
}

// -------------------------------------------------------------------------
// Option Members
// -------------------------------------------------------------------------
Expand All @@ -113,8 +122,8 @@ class DiversityProcessorOptions
CliOption<bool> no_tajima_d = false;

// General settings
PoolsizesOptions poolsizes;
WindowAverageOptions window_average_policy;
PoolsizesOptions poolsizes;

// Remnant settings. These are simply re-used from the numerical filters now.
// Kept here for reference, just in case they are needed for later.
Expand Down
66 changes: 56 additions & 10 deletions src/options/fst_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ std::vector<std::pair<std::string, FstProcessorOptions::FstMethod>> const fst_me

void FstProcessorOptions::add_fst_processor_opts_to_app(
CLI::App* sub,
VariantReferenceGenomeOptions const& ref_genome_opts,
Params const& fst_processor_params,
std::string const& group
) {
Expand All @@ -79,6 +80,13 @@ void FstProcessorOptions::add_fst_processor_opts_to_app(
// Basic Fst Settings
// -------------------------------------------------------------------------

// Window averaging
if( fst_processor_params.with_window_average_policy ) {
window_average_policy.add_window_average_opt_to_app(
sub, ref_genome_opts, !fst_processor_params.with_all_methods
);
}

// Settings: FST Method
if( fst_processor_params.with_all_methods ) {
method.option = sub->add_option(
Expand Down Expand Up @@ -111,10 +119,7 @@ void FstProcessorOptions::add_fst_processor_opts_to_app(
// which do need pool sizes, so then we require this to be provided.
poolsizes.add_poolsizes_opt_to_app( sub, !fst_processor_params.with_all_methods, group );

// Window averaging
if( fst_processor_params.with_window_average_policy ) {
window_average_policy.add_window_average_opt_to_app( sub, group );
}
// Store for later
params = fst_processor_params;

// -------------------------------------------------------------------------
Expand Down Expand Up @@ -337,7 +342,7 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo
using namespace genesis::population;

// Get the method that we want to use.
auto const method = get_fst_method();
auto const method_value = get_fst_method();

// Get all sample indices that we are actually interested in.
// We do this so that pool sizes for samples that we ignore anyway do not need to be given.
Expand All @@ -350,16 +355,57 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo

// Get the pool sizes for all samples that we are interested in.
auto const needs_pool_sizes = (
method == FstMethod::kUnbiasedNei ||
method == FstMethod::kUnbiasedHudson ||
method == FstMethod::kKofler
method_value == FstMethod::kUnbiasedNei ||
method_value == FstMethod::kUnbiasedHudson ||
method_value == FstMethod::kKofler
);
auto const pool_sizes = (
needs_pool_sizes
? poolsizes.get_pool_sizes( sample_names, used_samples )
: std::vector<size_t>( sample_names.size(), 0 )
);

// Check the window averaging setup
auto const needs_window_average_policy = (
method_value == FstMethod::kUnbiasedNei ||
method_value == FstMethod::kUnbiasedHudson
);
switch( method_value ) {
case FstMethod::kUnbiasedNei:
case FstMethod::kUnbiasedHudson: {
if(
! window_average_policy.get_window_average_policy_option().option ||
!*window_average_policy.get_window_average_policy_option().option
) {
throw CLI::ValidationError(
method.option->get_name() + ", " +
window_average_policy.get_window_average_policy_option().option->get_name(),
"Window average policy option needs to be provided with " +
method.option->get_name() + " " + method.value
);
}
break;
}
case FstMethod::kKofler:
case FstMethod::kKarlsson: {
if(
window_average_policy.get_window_average_policy_option().option &&
*window_average_policy.get_window_average_policy_option().option
) {
throw CLI::ValidationError(
method.option->get_name() + ", " +
window_average_policy.get_window_average_policy_option().option->get_name(),
"Window average policy option cannot be used with " +
method.option->get_name() + " " + method.value
);
}
break;
}
default: {
throw std::domain_error( "Internal error: Invalid FST method." );
}
}

// For our unbiased and for Kofler, check that we got the right number of pool sizes.
internal_check(
! needs_pool_sizes || pool_sizes.size() == sample_names.size(),
Expand All @@ -368,14 +414,14 @@ genesis::population::FstPoolProcessor FstProcessorOptions::get_fst_pool_processo

// Get the window average policy to use for all processors.
auto const win_avg_policy = (
params.with_window_average_policy
params.with_window_average_policy && needs_window_average_policy
? window_average_policy.get_window_average_policy()
: params.fix_window_average_policy
);

// Make the type of processor that we need for the provided method.
FstPoolProcessor processor;
switch( method ) {
switch( method_value ) {
case FstMethod::kUnbiasedNei: {
processor = make_fst_pool_processor<FstPoolCalculatorUnbiased>(
sample_pairs, pool_sizes, win_avg_policy,
Expand Down
14 changes: 12 additions & 2 deletions src/options/fst_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@

#include "CLI/CLI.hpp"

#include "options/variant_reference_genome.hpp"
#include "options/poolsizes.hpp"
#include "options/window_average.hpp"
#include "tools/cli_option.hpp"

#include "genesis/population/function/fst_pool_processor.hpp"
#include "genesis/population/function/window_average.hpp"
#include "genesis/population/genome_locus_set.hpp"

#include <memory>
#include <string>
#include <utility>
#include <tuple>
Expand Down Expand Up @@ -95,16 +98,18 @@ class FstProcessorOptions

void add_fst_processor_opts_to_app(
CLI::App* sub,
VariantReferenceGenomeOptions const& ref_genome_opts,
std::string const& group = "Settings"
) {
// Yet again we need an overload due to the stupid compiler bug about
// default constructed arguments, see https://stackoverflow.com/q/43819314/4184258
Params params;
return add_fst_processor_opts_to_app( sub, params, group );
return add_fst_processor_opts_to_app( sub, ref_genome_opts, params, group );
}

void add_fst_processor_opts_to_app(
CLI::App* sub,
VariantReferenceGenomeOptions const& ref_genome_opts,
Params const& fst_processor_params,
std::string const& group = "Settings"
);
Expand All @@ -126,15 +131,20 @@ class FstProcessorOptions
std::vector<std::pair<size_t, size_t>> const& sample_pairs
) const;

std::shared_ptr<genesis::population::GenomeLocusSet> get_provided_loci() const
{
return window_average_policy.get_provided_loci();
}

// -------------------------------------------------------------------------
// Option Members
// -------------------------------------------------------------------------

private:

WindowAverageOptions window_average_policy;
CliOption<std::string> method = "unbiased-nei";
PoolsizesOptions poolsizes;
WindowAverageOptions window_average_policy;
Params params;

CliOption<std::string> comparand = "";
Expand Down
1 change: 0 additions & 1 deletion src/options/variant_filter_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include "genesis/population/format/vcf_input_stream.hpp"
#include "genesis/population/function/functions.hpp"
#include "genesis/population/function/genome_locus_set.hpp"
#include "genesis/population/function/genome_locus_set.hpp"
#include "genesis/population/function/genome_region.hpp"
#include "genesis/population/genome_region.hpp"
#include "genesis/sequence/functions/dict.hpp"
Expand Down
8 changes: 8 additions & 0 deletions src/options/variant_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ class VariantInputOptions
// Run Functions
// -------------------------------------------------------------------------

/**
* @brief Return the VariantReferenceGenomeOptions used for the input
*/
VariantReferenceGenomeOptions const& get_variant_reference_genome_options() const
{
return reference_genome_options_;
}

/**
* @brief Return the pointer to the reference genome, if provided, or nullptr.
*/
Expand Down
Loading

0 comments on commit 079c128

Please sign in to comment.