From 1008a6b46128af6b8fc41898606058997650f9be Mon Sep 17 00:00:00 2001
From: Lennart <leraman@gastn070b1.private.ugent.be>
Date: Mon, 22 Oct 2018 17:31:11 +0200
Subject: [PATCH] included force gender argument during predict -- made z-score
 robust vs (possible?) issue

---
 README.md                    | 8 ++++----
 wisecondorX/main.py          | 7 +++++++
 wisecondorX/overall_tools.py | 2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index dcbfb32..b232904 100755
--- a/README.md
+++ b/README.md
@@ -50,11 +50,10 @@ There are three main stages (converting, reference creating & predicting) for us
 - Create a reference (using reference .npz files)  
     - **Important notes**
         - WisecondorX will internally generate a male and female gonosomal reference. It is advised that both male and female
-        samples are represented in the reference set.  
-        - It is also advised that both male and female samples (for NIPT, this means male and female feti) are (more or less)
-        equally included.  
+        samples (for NIPT, this means male and female feti) are represented in the reference set. Furthermore, they
+        should be more or less equally included.  
         - Gender prediction is based on a Gaussian mixture model of the Y-read fraction. This means that, if only few samples (<30)
-        are included during reference creation, this process will not be accurate, and gonosomal predictions could fail.  
+        are included during reference creation, this process might not be accurate, and gonosomal predictions could fail.  
         - It is of paramount importance that the reference set consists of exclusively healthy samples that originate from
         the same sequencer, mapper, reference genome, type of material, ... etc, as the test samples. As a rule of thumb,
         think of all laboratory and in silico pre-processing steps: the more sources of bias that can be omitted,
@@ -108,6 +107,7 @@ WisecondorX predict test_input.npz reference_input.npz output_id [--optional arg
 `--alpha x` | P-value cut-off for calling a CBS breakpoint (default: x=1e-4)  
 `--beta x` | Number between 0 and 1, defines the linear trade-off between sensitivity and specificity for aberration calling. If beta=0, all segments will be called as aberrations. If beta=1, the cut-off (at copy number 1.5 and 2.5) is optimized to capture all constitutional aberrations (default: x=0.1)  
 `--blacklist x` | Blacklist that masks additional regions in output, requires header-less .bed file. This is particularly useful when the reference set is a too small to recognize some obvious regions (such as centromeres; example at `./example.blacklist/centromere.hg38.txt`) (default: x=None)  
+`--gender x` | Force WisecondorX to analyze this file as a male (M) or female (F). Useful when dealing with a whole-chromosome Y-deletion (choices: x=F or x=M)
 `--bed` | Outputs tab-delimited .bed files (trisomy 21 NIPT example at `./example.bed`), containing all necessary information  **(\*)**
 `--plot` | Outputs custom .png plots (trisomy 21 NIPT example at `./example.plot`), directly interpretable  **(\*)**  
 
diff --git a/wisecondorX/main.py b/wisecondorX/main.py
index 2b82d3b..8a47fc6 100755
--- a/wisecondorX/main.py
+++ b/wisecondorX/main.py
@@ -145,6 +145,9 @@ def tool_test(args):
     else:
         actual_gender = 'F'
 
+    if args.gender:
+        actual_gender = args.gender
+
     ref_gender = actual_gender
 
     logging.info('Normalizing autosomes ...')
@@ -346,6 +349,10 @@ def main():
                              default=None,
                              help='Blacklist that masks regions in output, structure of header-less '
                                   'file: chr...(/t)startpos(/t)endpos(/n)')
+    parser_test.add_argument('--gender',
+                             type=str,
+                             choices=["F", "M"],
+                             help='Force WisecondorX to analyze this case as a male (M) or a female (F)')
     parser_test.add_argument('--bed',
                              action='store_true',
                              help='Outputs tab-delimited .bed files, containing the most important information')
diff --git a/wisecondorX/overall_tools.py b/wisecondorX/overall_tools.py
index febe093..d7d5881 100644
--- a/wisecondorX/overall_tools.py
+++ b/wisecondorX/overall_tools.py
@@ -92,7 +92,7 @@ def get_z_score(results_c, results):
         segment_w = results_w[segment[0]][segment[1]:segment[2]]
         segment_w = [segment_w[i] for i in range(len(segment_w)) if segment_rr[i] != 0]
         null_segments = [np.ma.average(x, weights=segment_w) for x in np.transpose(segment_nr)]
-        zs.append(segment[3] / np.std(null_segments))
+        zs.append(segment[3] / np.std([x for x in null_segments if np.isfinite(x)]))
     return zs