From 7ff7cbc5c2b663796bc6a419145ed34136ace4c0 Mon Sep 17 00:00:00 2001
From: Lennart <leraman@arraycgh.ugent.be>
Date: Tue, 2 Jul 2019 15:29:10 +0200
Subject: [PATCH 1/3] Made robust vs small references #37

---
 wisecondorX/main.py          | 5 +++++
 wisecondorX/newref_tools.py  | 3 ++-
 wisecondorX/overall_tools.py | 4 +++-
 wisecondorX/predict_tools.py | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/wisecondorX/main.py b/wisecondorX/main.py
index c7922f0..2281643 100755
--- a/wisecondorX/main.py
+++ b/wisecondorX/main.py
@@ -201,6 +201,11 @@ def tool_test(args):
     results_w = np.append(results_w * np.nanmedian(results_w_2),
                           results_w_2 * np.nanmedian(results_w))
     results_w = results_w / np.nanmedian(results_w)
+
+    if np.isnan(results_w).any() or np.isinf(results_w).any():
+        logging.warning('Non-numeric values found in weights -- reference too small. Circular binary segmentation will be unweighted')
+        results_w = np.ones(len(results_w))
+
     ref_sizes = np.append(ref_sizes, ref_sizes_2)
 
     null_ratios = np.array(
diff --git a/wisecondorX/newref_tools.py b/wisecondorX/newref_tools.py
index 2a8b534..ccf698a 100644
--- a/wisecondorX/newref_tools.py
+++ b/wisecondorX/newref_tools.py
@@ -45,6 +45,8 @@ def train_gender_model(samples):
 
     cut_off = gmm_x[local_min_i][0]
 
+    # plot(cut_off)
+
     genders[y_fractions > cut_off] = 'M'
     genders[y_fractions < cut_off] = 'F'
 
@@ -181,7 +183,6 @@ def get_reference(pca_corrected_data, masked_bins_per_chr, masked_bins_per_chr_c
             ref = sample[index_array[bin_i - start_num]]
             r = np.log2(sample[bin_i] / np.median(ref))
             null_ratio_array[bin_i - start_num][null_i] = r
-
     return index_array, distance_array, null_ratio_array
 
 
diff --git a/wisecondorX/overall_tools.py b/wisecondorX/overall_tools.py
index 8e48498..ef9392c 100644
--- a/wisecondorX/overall_tools.py
+++ b/wisecondorX/overall_tools.py
@@ -94,7 +94,9 @@ def get_z_score(results_c, results):
         null_segments = [np.ma.average(x, weights=segment_w) for x in np.transpose(segment_nr)]
         null_mean = np.ma.mean([x for x in null_segments if np.isfinite(x)])
         null_sd = np.ma.std([x for x in null_segments if np.isfinite(x)])
-        zs.append((segment[3] - null_mean) / null_sd)
+        z = (segment[3] - null_mean) / null_sd
+        z = min(z, 1000) ; z = max(z, -1000)
+        zs.append(z)
     return zs
 
 
diff --git a/wisecondorX/predict_tools.py b/wisecondorX/predict_tools.py
index bc47901..d659c52 100644
--- a/wisecondorX/predict_tools.py
+++ b/wisecondorX/predict_tools.py
@@ -137,7 +137,7 @@ def _normalize_once(test_data, test_copy, ref_file, optimal_cutoff, ct, cp, ap):
 '''
 The means of sets of within-sample reference
 distances can serve as inverse weights for
-CBS and Stouffer's z-scoring.
+CBS, Z-scoring and plotting.
 '''
 
 

From 68a044bc1aaf38b7bdc2c4959acb6a332cd2a5bc Mon Sep 17 00:00:00 2001
From: Lennart <leraman@arraycgh.ugent.be>
Date: Tue, 2 Jul 2019 15:30:08 +0200
Subject: [PATCH 2/3] Bump version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b6d79c2..c1bf6ec 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 from setuptools import setup, find_packages
 
-version = '1.1.1'
+version = '1.1.2'
 dl_version = 'master' if 'dev' in version else '{}'.format(version)
 
 setup(

From 9fd8baae6893fb6dec2f820a534561a97b20b41a Mon Sep 17 00:00:00 2001
From: Lennart <leraman@arraycgh.ugent.be>
Date: Tue, 2 Jul 2019 15:31:59 +0200
Subject: [PATCH 3/3] warning message correction

---
 wisecondorX/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wisecondorX/main.py b/wisecondorX/main.py
index 2281643..2547ed2 100755
--- a/wisecondorX/main.py
+++ b/wisecondorX/main.py
@@ -203,7 +203,7 @@ def tool_test(args):
     results_w = results_w / np.nanmedian(results_w)
 
     if np.isnan(results_w).any() or np.isinf(results_w).any():
-        logging.warning('Non-numeric values found in weights -- reference too small. Circular binary segmentation will be unweighted')
+        logging.warning('Non-numeric values found in weights -- reference too small. Circular binary segmentation and z-scoring will be unweighted')
         results_w = np.ones(len(results_w))
 
     ref_sizes = np.append(ref_sizes, ref_sizes_2)