From 896ce583c1c5b069d50e3e1c327877a6b70a601c Mon Sep 17 00:00:00 2001 From: Lucas Czech Date: Thu, 6 Jun 2024 14:11:27 +0200 Subject: [PATCH] Refine low min read depth handling for Theta --- CMakeLists.txt | 2 +- doc/md/diversity.md | 7 ++++++- libs/genesis | 2 +- src/commands/analyze/diversity.cpp | 6 +++--- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 303ee9b..6436067 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,7 +168,7 @@ include( "${CMAKE_CURRENT_LIST_DIR}/tools/cmake/DownloadDependency.cmake" ) # These are replaced by tools/cmake/update_dependencies.sh to the hashes that are currently checked out. # Thus, do not replace the hashes manually! SET( CLI11_COMMIT_HASH "5cb3efabce007c3a0230e4cc2e27da491c646b6c" ) #CLI11_COMMIT_HASH# -SET( genesis_COMMIT_HASH "1a0a8d9550a9f21900dab7169eca9aea55cd3cf0" ) #genesis_COMMIT_HASH# +SET( genesis_COMMIT_HASH "44c6d2d5a6d2940555d15d455065792a7a720c8a" ) #genesis_COMMIT_HASH# # Call the github download function, which takes four arguments: # - LIBPATH : Path to the libracy dir where dependencies are stored. diff --git a/doc/md/diversity.md b/doc/md/diversity.md index e147bef..f45e1c4 100644 --- a/doc/md/diversity.md +++ b/doc/md/diversity.md @@ -14,4 +14,9 @@ As described there, we recommend to be careful when numerically interpreting Taj The estimators use the following filter settings internally as well as part of their corrections: `--filter-sample-min-count`, `--filter-sample-min-read-depth`, and `--filter-sample-max-read-depth`. See the equations document above for details on how these parameters influence the estimators. Note that the `total` filters are applied for filtering here as well (as far as provided), but have no influence on the estimators themselves. We offer them here for convenience only. -The values per window are computed using the window averaging as described [here](../wiki/Windowing#window-averaging-policy), in order to scale the results per base pair. See [Output](../wiki/Output) for a description of the columns of the produced table. +The `--filter-sample-min-count` has to be set to exactly 2 when computing Tajima's D. This is a consequence of the equations, which only work out when exactly filtering out singleton counts, according to Kofler et al. Furthermore, the correction terms for Theta Pi and Theta Watterson make use of the `--filter-sample-min-read-depth`, by computing a sum over values in that range. Due to the exact way that this correction works, it is recommended to use a minimum read depth of at least twice the minimum per-base count: + + --filter-sample-min-count 2 + --filter-sample-min-read-depth 4 + +Lastly, the values per window are computed using the window averaging as described [here](../wiki/Windowing#window-averaging-policy), in order to scale the results per base pair. See [Output](../wiki/Output) for a description of the columns of the produced table. diff --git a/libs/genesis b/libs/genesis index 1a0a8d9..44c6d2d 160000 --- a/libs/genesis +++ b/libs/genesis @@ -1 +1 @@ -Subproject commit 1a0a8d9550a9f21900dab7169eca9aea55cd3cf0 +Subproject commit 44c6d2d5a6d2940555d15d455065792a7a720c8a diff --git a/src/commands/analyze/diversity.cpp b/src/commands/analyze/diversity.cpp index f16780f..23522e3 100644 --- a/src/commands/analyze/diversity.cpp +++ b/src/commands/analyze/diversity.cpp @@ -212,9 +212,9 @@ DiversityCommandState prepare_output_data_( throw CLI::ValidationError( options.filter_numerical.sample_min_count.option->get_name(), "The pool-seq corrected computation of Tajima's D requires the minimum allele count " - "to be exactly 2, according to Kofler et al. In case 2 is insufficient, " - "we recommend to subsample the reads to a smaller read depth. " - "Alternatively, deactivate the compuation of Tajima's D." + "to be exactly 2, i.e., exclusing singletons, according to Kofler et al. " + "In case 2 is insufficient, we recommend to subsample the reads to a smaller " + "read depth. Alternatively, deactivate the compuation of Tajima's D." ); } if( options.filter_numerical.sample_min_read_depth.value == 0 ) {