Skip to content

Commit

Permalink
Refine subsampling option and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Mar 13, 2024
1 parent 4a72e31 commit 57b8f67
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ include( "${CMAKE_CURRENT_LIST_DIR}/tools/cmake/DownloadDependency.cmake" )
# These are replaced by tools/cmake/update_dependencies.sh to the hashes that are currently checked out.
# Thus, do not replace the hashes manually!
SET( CLI11_COMMIT_HASH "5cb3efabce007c3a0230e4cc2e27da491c646b6c" ) #CLI11_COMMIT_HASH#
SET( genesis_COMMIT_HASH "91b2221df7921f080d362bbca134708fd962c864" ) #genesis_COMMIT_HASH#
SET( genesis_COMMIT_HASH "5aeac184fc0b5d78b72e944309a8d688d85fc581" ) #genesis_COMMIT_HASH#

# Call the github download function, which takes four arguments:
# - LIBPATH : Path to the libracy dir where dependencies are stored.
Expand Down
1 change: 1 addition & 0 deletions src/options/variant_filter_region.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "genesis/population/formats/genome_region_reader.hpp"
#include "genesis/population/formats/gff_reader.hpp"
#include "genesis/population/formats/map_bim_reader.hpp"
#include "genesis/population/formats/vcf_common.hpp"
#include "genesis/population/formats/vcf_input_stream.hpp"
#include "genesis/population/functions/filter_transform.hpp"
#include "genesis/population/functions/functions.hpp"
Expand Down
15 changes: 10 additions & 5 deletions src/options/variant_transform_subsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,21 @@ void VariantTransformSubsampleOptions::add_subsample_opts_to_app(
);

// Rename samples option.
// See https://www.kofler.or.at/bioinformatic/wp-content/uploads/2018/07/pooledAnalysis_part1.pdf
max_coverage_.option = sub->add_option(
"--subsample-max-coverage",
max_coverage_.value,
"If provided, the nucleotide counts of each sample are subsampled so that they do not "
"exceed this given maximum total coverage (sum of the four nucleotide counts). "
"exceed this given maximum total coverage (sum of the four nucleotide counts, as well as "
"the any `N` and deleted `D` counts). "
"If they are below this value anyway, they are not changed. "
"This transformation is useful to limit the maximum coverage. For instance, the diversity "
"estimators for Theta Pi and Theta Watterson have terms that depend on coverage. "
"In particular when merging samples such as with `--sample-group-merge-table-file`, "
"having an upper limit can hence avoid long compute times."
"having an upper limit can hence avoid long compute times. "
"Furthermore, a very low Tajima's D, usually indicative of a selective sweep, may be found "
"as an artifact in highly covered regions, as such regions have just more sequencing errors. "
"To avoid these kinds of biases we recommend to subsample to an uniform coverage. "
// "This transformation is applied after any filters, so that, e.g., filters high coverage "
// "remove any unwanted positions first. See `--subsample-method` for the subsampling method."
);
Expand Down Expand Up @@ -110,21 +115,21 @@ void VariantTransformSubsampleOptions::add_subsample_transformation(
if( method == "subscale" ) {
variant_input.add_combined_filter_and_transforms(
[ max_coverage ]( Variant& variant ){
transform_subscale( variant, max_coverage );
subscale_counts( variant, max_coverage );
return true;
}
);
} else if( method == "subsample-with-replacement" ) {
variant_input.add_combined_filter_and_transforms(
[ max_coverage ]( Variant& variant ){
transform_subsample_with_replacement( variant, max_coverage );
subsample_counts_with_replacement( variant, max_coverage );
return true;
}
);
} else if( method == "subsample-without-replacement" ) {
variant_input.add_combined_filter_and_transforms(
[ max_coverage ]( Variant& variant ){
transform_subsample_without_replacement( variant, max_coverage );
subsample_counts_without_replacement( variant, max_coverage );
return true;
}
);
Expand Down

0 comments on commit 57b8f67

Please sign in to comment.