-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
1,587 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Test | ||
|
||
This directory contains test cases that are mostly meant for internal testing. However, feel free to have a look if you are interested. | ||
|
||
We here run grenedalf as well as our minimalistic independent implementation of the equations in Python, and compare the two to each other. As they both yield the same results, we do have some confidence that the equations are correctly implemented. The tests span a range of pool sizes, read depths, allele frequencies, and window sizes. However, this is a minimalistic test with clean data, i.e., we assume sufficient read depth and no missing data in the test. Hence, if you encounter any issues on more realistic data, please report the [issue](https://github.com/lczech/grenedalf/issues). | ||
|
||
The Python dependencies for running the tests are listed in `conda.yaml` and `pip.txt`, for the two package manages. Use these to install the packages needed to run the tests. We also run these tests in our [CI Tests](https://github.com/lczech/grenedalf/actions) in GitHub Actions. | ||
|
||
By default, we do not test PoPoolation here, as that's not the scope of this test. We however have implemented this for completeness as well; if you want to run the test for PoPoolation, find the commented line containing `run_popoolation` at the very end of the `execute_test.py` script, and un-comment it. Furthermore, download the source code of [PoPoolation](https://sourceforge.net/projects/popoolation/) and [PoPoolation2](https://sourceforge.net/projects/popoolation2/), and place both as sub-directories in `test/popoolation`, named `popoolation` and `popoolation2`, respectively. | ||
|
||
The test scripts are also published in the supporting grenedalf manuscript repository, see [here](https://github.com/lczech/grenedalf-paper/tree/master/eval-independent-test). That repository contains the scripts for all tests and benchmarks that we ran for the manuscript, in particular for assessing correctness and performance of grenedalf. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
|
||
# Remove the generated data files | ||
rm -r mpileup | ||
rm -r sync | ||
rm -r poolsizes | ||
|
||
# Remove the run files | ||
rm -r grenedalf/diversity | ||
rm -r grenedalf/fst | ||
rm -r grenedalf/logs | ||
rm -r popoolation/diversity | ||
rm -r popoolation/fst | ||
rm -r popoolation/logs | ||
|
||
# Remove the test result files | ||
rm -r test_results.tsv | ||
rm -r figures_* | ||
|
||
# Also remove unnecessary py stuff | ||
rm -r __pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
name: grenedalf_test | ||
channels: | ||
- defaults | ||
- conda-forge | ||
dependencies: | ||
- python=3.8 | ||
- matplotlib | ||
- numpy | ||
- pandas | ||
- scipy | ||
- seaborn | ||
- tqdm | ||
- pip | ||
- pip: | ||
- typing-extensions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import sys, os | ||
import pandas as pd | ||
import tqdm | ||
|
||
os.makedirs("mpileup", exist_ok=True) | ||
os.makedirs("sync", exist_ok=True) | ||
|
||
chromosome = "chr1" | ||
ref_allele = 'A' | ||
derived_allele = 'C' | ||
|
||
# ------------------------------------------------------------ | ||
# generate pileup | ||
# ------------------------------------------------------------ | ||
|
||
def generate_mpileup(file_id, coverage, derived_count, window_size): | ||
output_file = f"mpileup/{file_id}.pileup" | ||
# print(f"Generating mpileup file: {output_file}") | ||
|
||
with open(output_file, 'w') as f: | ||
for pos in range(1, window_size + 1): | ||
if pos == 1: | ||
cov1 = coverage - derived_count | ||
cov2 = derived_count | ||
else: | ||
cov1 = coverage | ||
cov2 = 0 | ||
bases = ref_allele * cov1 + derived_allele * cov2 | ||
qualities = 'I' * coverage | ||
f.write(f"{chromosome}\t{pos}\t{ref_allele}\t{coverage}\t{bases}\t{qualities}\n") | ||
|
||
def generate_mpileup_sample(coverage, derived_count, window_size): | ||
file_id = f"mpileup-cov_{coverage}-der_{derived_count}-win_{window_size}" | ||
generate_mpileup(file_id, coverage, derived_count, window_size) | ||
|
||
# ------------------------------------------------------------ | ||
# generate sync | ||
# ------------------------------------------------------------ | ||
|
||
def write_sync_sample( filehandle, coverage, derived_count ): | ||
a_cnt = coverage - derived_count | ||
t_cnt = derived_count | ||
filehandle.write(f"\t{a_cnt}:{t_cnt}:0:0:0:0") | ||
|
||
def generate_sync(file_id, coverage_1, coverage_2, derived_count_1, derived_count_2, window_size): | ||
output_file = f"sync/{file_id}.sync" | ||
# print(f"Generating sync file: {output_file}") | ||
|
||
with open(output_file, 'w') as f: | ||
for pos in range(1, window_size + 1): | ||
f.write(f"{chromosome}\t{pos}\t{ref_allele}") | ||
if pos == 1: | ||
write_sync_sample( f, coverage_1, derived_count_1 ) | ||
write_sync_sample( f, coverage_2, derived_count_2 ) | ||
else: | ||
write_sync_sample( f, coverage_1, 0 ) | ||
write_sync_sample( f, coverage_2, 0 ) | ||
f.write("\n") | ||
|
||
def generate_sync_sample(coverage_1, coverage_2, derived_count_1, derived_count_2, window_size): | ||
file_id = f"sync-cov1_{coverage_1}-cov2_{coverage_2}-der1_{derived_count_1}-der2_{derived_count_2}-win_{window_size}" | ||
generate_sync(file_id, coverage_1, coverage_2, derived_count_1, derived_count_2, window_size) | ||
|
||
# ------------------------------------------------------------ | ||
# read table | ||
# ------------------------------------------------------------ | ||
|
||
def create_from_df(df): | ||
# Iterate over each row | ||
for _, row in df.iterrows(): | ||
generate_mpileup_sample( | ||
int(row['coverage_1']), | ||
int(row['derived_count_1']), | ||
int(row['window_size']) | ||
) | ||
generate_mpileup_sample( | ||
int(row['coverage_2']), | ||
int(row['derived_count_2']), | ||
int(row['window_size']) | ||
) | ||
generate_sync_sample( | ||
int(row['coverage_1']), | ||
int(row['coverage_2']), | ||
int(row['derived_count_1']), | ||
int(row['derived_count_2']), | ||
int(row['window_size']) | ||
) | ||
|
||
def create_from_param_space(param_space): | ||
print("Creating files") | ||
for params in tqdm.tqdm(param_space): | ||
n1, n2, c1, c2, k1, k2, w = params | ||
|
||
generate_mpileup_sample( | ||
int(c1), | ||
int(k1), | ||
int(w) | ||
) | ||
generate_mpileup_sample( | ||
int(c2), | ||
int(k2), | ||
int(w) | ||
) | ||
generate_sync_sample( | ||
int(c1), | ||
int(c2), | ||
int(k1), | ||
int(k2), | ||
int(w) | ||
) | ||
|
||
def main(): | ||
# Read the tab-delimited table using pandas | ||
infile = "independent_check_statistics.tsv" | ||
df = pd.read_csv(infile, delimiter='\t') | ||
# print(df) | ||
create_from_df(df) | ||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.