From 1008a6b46128af6b8fc41898606058997650f9be Mon Sep 17 00:00:00 2001 From: Lennart Date: Mon, 22 Oct 2018 17:31:11 +0200 Subject: [PATCH] included force gender argument during predict -- made z-score robust vs (possible?) issue --- README.md | 8 ++++---- wisecondorX/main.py | 7 +++++++ wisecondorX/overall_tools.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index dcbfb32..b232904 100755 --- a/README.md +++ b/README.md @@ -50,11 +50,10 @@ There are three main stages (converting, reference creating & predicting) for us - Create a reference (using reference .npz files) - **Important notes** - WisecondorX will internally generate a male and female gonosomal reference. It is advised that both male and female - samples are represented in the reference set. - - It is also advised that both male and female samples (for NIPT, this means male and female feti) are (more or less) - equally included. + samples (for NIPT, this means male and female feti) are represented in the reference set. Furthermore, they + should be more or less equally included. - Gender prediction is based on a Gaussian mixture model of the Y-read fraction. This means that, if only few samples (<30) - are included during reference creation, this process will not be accurate, and gonosomal predictions could fail. + are included during reference creation, this process might not be accurate, and gonosomal predictions could fail. - It is of paramount importance that the reference set consists of exclusively healthy samples that originate from the same sequencer, mapper, reference genome, type of material, ... etc, as the test samples. As a rule of thumb, think of all laboratory and in silico pre-processing steps: the more sources of bias that can be omitted, @@ -108,6 +107,7 @@ WisecondorX predict test_input.npz reference_input.npz output_id [--optional arg `--alpha x` | P-value cut-off for calling a CBS breakpoint (default: x=1e-4) `--beta x` | Number between 0 and 1, defines the linear trade-off between sensitivity and specificity for aberration calling. If beta=0, all segments will be called as aberrations. If beta=1, the cut-off (at copy number 1.5 and 2.5) is optimized to capture all constitutional aberrations (default: x=0.1) `--blacklist x` | Blacklist that masks additional regions in output, requires header-less .bed file. This is particularly useful when the reference set is a too small to recognize some obvious regions (such as centromeres; example at `./example.blacklist/centromere.hg38.txt`) (default: x=None) +`--gender x` | Force WisecondorX to analyze this file as a male (M) or female (F). Useful when dealing with a whole-chromosome Y-deletion (choices: x=F or x=M) `--bed` | Outputs tab-delimited .bed files (trisomy 21 NIPT example at `./example.bed`), containing all necessary information **(\*)** `--plot` | Outputs custom .png plots (trisomy 21 NIPT example at `./example.plot`), directly interpretable **(\*)** diff --git a/wisecondorX/main.py b/wisecondorX/main.py index 2b82d3b..8a47fc6 100755 --- a/wisecondorX/main.py +++ b/wisecondorX/main.py @@ -145,6 +145,9 @@ def tool_test(args): else: actual_gender = 'F' + if args.gender: + actual_gender = args.gender + ref_gender = actual_gender logging.info('Normalizing autosomes ...') @@ -346,6 +349,10 @@ def main(): default=None, help='Blacklist that masks regions in output, structure of header-less ' 'file: chr...(/t)startpos(/t)endpos(/n)') + parser_test.add_argument('--gender', + type=str, + choices=["F", "M"], + help='Force WisecondorX to analyze this case as a male (M) or a female (F)') parser_test.add_argument('--bed', action='store_true', help='Outputs tab-delimited .bed files, containing the most important information') diff --git a/wisecondorX/overall_tools.py b/wisecondorX/overall_tools.py index febe093..d7d5881 100644 --- a/wisecondorX/overall_tools.py +++ b/wisecondorX/overall_tools.py @@ -92,7 +92,7 @@ def get_z_score(results_c, results): segment_w = results_w[segment[0]][segment[1]:segment[2]] segment_w = [segment_w[i] for i in range(len(segment_w)) if segment_rr[i] != 0] null_segments = [np.ma.average(x, weights=segment_w) for x in np.transpose(segment_nr)] - zs.append(segment[3] / np.std(null_segments)) + zs.append(segment[3] / np.std([x for x in null_segments if np.isfinite(x)])) return zs