From 001b5a3fae333cab5a2a939c3902805a1b0483a1 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb069login.ib.rsshpc1.sc1.science.roche.com>
Date: Sat, 26 Jan 2019 19:09:31 -0800
Subject: [PATCH 1/7] fixes for pytorch1

---
 neusomatic/python/call.py       |  3 +--
 neusomatic/python/dataloader.py |  6 ++++--
 neusomatic/python/train.py      | 13 ++++++-------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/neusomatic/python/call.py b/neusomatic/python/call.py
index d66bda9..d6d02c3 100755
--- a/neusomatic/python/call.py
+++ b/neusomatic/python/call.py
@@ -383,8 +383,7 @@ def call_neusomatic(candidates_tsv, ref_file, out_dir, checkpoint, num_threads,
 
     vartype_classes = ['DEL', 'INS', 'NONE', 'SNP']
     data_transform = transforms.Compose(
-        [transforms.ToTensor(),
-         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+        [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
     num_channels = 119 if ensemble else 26
     net = NeuSomaticNet(num_channels)
     if use_cuda:
diff --git a/neusomatic/python/dataloader.py b/neusomatic/python/dataloader.py
index a976997..35ee0ee 100755
--- a/neusomatic/python/dataloader.py
+++ b/neusomatic/python/dataloader.py
@@ -388,9 +388,11 @@ def __getitem__(self, index):
             non_transformed_matrix = np.array(orig_matrix).astype(np.uint8)
         else:
             non_transformed_matrix = []
-
+            
+        matrix = torch.from_numpy(matrix.transpose((2, 0, 1)))
+        matrix = matrix.float().div(255)
         if self.transform is not None:
-            matrix = self.transform(matrix)
+             matrix = self.transform(matrix)
 
         var_pos = [length, center]
         var_pos = torch.Tensor(var_pos)
diff --git a/neusomatic/python/train.py b/neusomatic/python/train.py
index 3f86e07..3c97194 100755
--- a/neusomatic/python/train.py
+++ b/neusomatic/python/train.py
@@ -101,7 +101,7 @@ def test(net, epoch, validation_loader, use_cuda):
 
         for i in range(len(labels)):
             label = labels[i]
-            class_correct[label] += compare_labels[i]
+            class_correct[label] += compare_labels[i].data.cpu().numpy()
             class_total[label] += 1
         for i in range(len(predicted)):
             label = predicted[i]
@@ -110,7 +110,7 @@ def test(net, epoch, validation_loader, use_cuda):
         compare_len = (len_pred == var_len_s).squeeze()
         for i in range(len(var_len_s)):
             len_ = var_len_s[i]
-            len_class_correct[len_] += compare_len[i]
+            len_class_correct[len_] += compare_len[i].data.cpu().numpy()
             len_class_total[len_] += 1
         for i in range(len(len_pred)):
             len_ = len_pred[i]
@@ -186,8 +186,7 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
         torch.set_num_threads(num_threads)
 
     data_transform = transforms.Compose(
-        [transforms.ToTensor(),
-         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+        [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
     )
     num_channels = 119 if ensemble else 26
     net = NeuSomaticNet(num_channels)
@@ -396,14 +395,14 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
                 if use_cuda:
                     var_len_labels = var_len_labels.cuda()
                 loss = criterion_crossentropy(outputs_classification, labels) + 1 * criterion_smoothl1(
-                    outputs_pos, var_pos_s[:, 1]
+                    outputs_pos.squeeze(1), var_pos_s[:, 1]
                 ) + 1 * criterion_crossentropy2(outputs_len, var_len_labels)
 
                 loss.backward()
                 optimizer.step()
-                loss_s.append(loss.data[0])
+                loss_s.append(loss.data)
 
-                running_loss += loss.data[0]
+                running_loss += loss.data
                 if i_ % print_freq == print_freq - 1:
                     logger.info('epoch: {}, i: {:>5}, lr: {}, loss: {:.5f}'.format(
                                 epoch + 1 + prev_epochs, i_ + 1,

From 0c3b8b8f699cb54c4c41afa4265d9956e6497cd1 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb069login.ib.rsshpc1.sc1.science.roche.com>
Date: Wed, 30 Jan 2019 12:17:03 -0800
Subject: [PATCH 2/7] Convert to Python 3

---
 README.md                                     |  29 +-
 neusomatic/python/call.py                     |  47 ++--
 neusomatic/python/dataloader.py               |  19 +-
 .../python/extract_postprocess_targets.py     |   4 +-
 neusomatic/python/filter_candidates.py        |  39 +--
 neusomatic/python/generate_dataset.py         | 263 +++++++++---------
 neusomatic/python/long_read_indelrealign.py   | 114 ++++----
 neusomatic/python/merge_tsvs.py               |  10 +-
 neusomatic/python/network.py                  |  10 +-
 neusomatic/python/postprocess.py              |   8 +-
 neusomatic/python/preprocess.py               |   8 +-
 neusomatic/python/resolve_scores.py           |  16 +-
 neusomatic/python/resolve_variants.py         |  47 ++--
 neusomatic/python/scan_alignments.py          |  10 +-
 neusomatic/python/split_bed.py                |   4 +-
 neusomatic/python/train.py                    |  34 +--
 neusomatic/python/utils.py                    |   4 +-
 test/NeuSomatic_ensemble.vcf                  |  10 +-
 test/NeuSomatic_standalone.vcf                |  10 +-
 19 files changed, 350 insertions(+), 336 deletions(-)

diff --git a/README.md b/README.md
index 8b80d88..77c28d5 100644
--- a/README.md
+++ b/README.md
@@ -29,33 +29,32 @@ doi: https://doi.org/10.1101/393801](https://doi.org/10.1101/393801)
 
 ## Availability
 
-NeuSomatic is written in Python and C++ and requires a Unix-like environment to run. It has been sucessfully tested on CentOS 7. Its deep learning framework is implemented using PyTorch 0.3.1 to enable GPU acceleration for training/testing.
+NeuSomatic is written in Python and C++ and requires a Unix-like environment to run. It has been sucessfully tested on CentOS 7. Its deep learning framework is implemented using PyTorch 1.0.0 to enable GPU acceleration for training/testing.
 
 NeuSomatic first scans the genome to identify candidate variants and extract alignment information. 
-The binary for this step can be obtained at `neusomatic/bin` folder by running `./build.sh` (which requires cmake 3.12.1 and g++ 5.4.0).
+The binary for this step can be obtained at `neusomatic/bin` folder by running `./build.sh` (which requires cmake 3.13.2 and g++ 5.4.0).
 
 Python 2.7 and the following Python packages must be installed:
-* pytorch 0.3.1
-* torchvision 0.2.0
-* pybedtools 0.7.10
-* pysam 0.14.1
+* pytorch 1.0.0
+* torchvision 0.2.1
+* pybedtools 0.8.0
+* pysam 0.15.2
 * zlib 1.2.11
-* numpy 1.14.3
-* scipy 1.1.0
-* biopython 1.68
+* numpy 1.15.4
+* scipy 1.2.0
+* biopython 1.73
 
 It also depends on the following packages:
 * cuda80 1.0 (if you want to use GPU)
-* tabix 0.2.5
+* tabix 0.2.6
 * bedtools 2.27.1
-* samtools 1.7
+* samtools 1.9
 
 You can install these packages using [anaconda](https://www.anaconda.com/download)/[miniconda](https://conda.io/miniconda.html) :
 ```
-conda install zlib=1.2.11 numpy=1.14.3 scipy=1.1.0 
-conda install pytorch=0.3.1 torchvision=0.2.0 cuda80=1.0 -c pytorch
-conda install cmake=3.12.1 -c conda-forge
-conda install pysam=0.14.1 pybedtools=0.7.10 samtools=1.7 tabix=0.2.5 bedtools=2.27.1 biopython=1.68 -c bioconda
+conda install zlib=1.2.11 numpy=1.15.4 scipy=1.2.0 cmake=3.13.2
+conda install pysam=0.15.2 pybedtools=0.8.0 samtools=1.9 tabix=0.2.6 bedtools=2.27.1 biopython=1.73 -c bioconda
+conda install pytorch=1.0.0 torchvision=0.2.1 cuda80==1.0 -c pytorch
 ```
 Then you can export the conda paths as:
 ```
diff --git a/neusomatic/python/call.py b/neusomatic/python/call.py
index e5d3e71..5b945e0 100755
--- a/neusomatic/python/call.py
+++ b/neusomatic/python/call.py
@@ -79,31 +79,32 @@ def call_variants(net, vartype_classes, call_loader, out_dir, model_tag, use_cud
                     imsave(file_name, non_transformed_matrices[i, :, :, 0:3])
                 true_path[path] = file_name
                 final_preds[path] = [vartype_classes[predicted[i]], pos_pred[i], len_pred[i],
-                                     map(lambda x: round(x, 4), F.softmax(
-                                         outputs1[i, :], 0).data.cpu().numpy()),
-                                     map(lambda x: round(x, 4), F.softmax(
-                                         outputs3[i, :], 0).data.cpu().numpy()),
-                                     map(lambda x: round(x, 4),
-                                         outputs1.data.cpu()[i].numpy()),
-                                     map(lambda x: round(x, 4),
-                                         outputs3.data.cpu()[i].numpy())]
+                                     list(map(lambda x: round(x, 4), F.softmax(
+                                         outputs1[i, :], 0).data.cpu().numpy())),
+                                     list(map(lambda x: round(x, 4), F.softmax(
+                                         outputs3[i, :], 0).data.cpu().numpy())),
+                                     list(map(lambda x: round(x, 4),
+                                         outputs1.data.cpu()[i].numpy())),
+                                     list(map(lambda x: round(x, 4),
+                                         outputs3.data.cpu()[i].numpy()))]
             else:
                 none_preds[path] = [vartype_classes[predicted[i]], pos_pred[i], len_pred[i],
-                                    map(lambda x: round(x, 4), F.softmax(
-                                        outputs1[i, :], 0).data.cpu().numpy()),
-                                    map(lambda x: round(x, 4), F.softmax(
-                                        outputs3[i, :], 0).data.cpu().numpy()),
-                                    map(lambda x: round(x, 4),
-                                        outputs1.data.cpu()[i].numpy()),
-                                    map(lambda x: round(x, 4),
-                                        outputs3.data.cpu()[i].numpy())]
+                                    list(map(lambda x: round(x, 4), F.softmax(
+                                        outputs1[i, :], 0).data.cpu().numpy())),
+                                    list(map(lambda x: round(x, 4), F.softmax(
+                                        outputs3[i, :], 0).data.cpu().numpy())),
+                                    list(map(lambda x: round(x, 4),
+                                        outputs1.data.cpu()[i].numpy())),
+                                    list(map(lambda x: round(x, 4),
+                                        outputs3.data.cpu()[i].numpy()))]
         if (iii % 10 == 0):
             logger.info("Called {} candidates in this batch.".format(j))
     logger.info("Called {} candidates in this batch.".format(j))
     return final_preds, none_preds, true_path
 
 
-def pred_vcf_records_path((path, true_path_, pred_all, chroms, vartype_classes, ref_file)):
+def pred_vcf_records_path(record):
+    path, true_path_, pred_all, chroms, vartype_classes, ref_file = record
     thread_logger = logging.getLogger(
         "{} ({})".format(pred_vcf_records_path.__name__, multiprocessing.current_process().name))
     try:
@@ -196,7 +197,7 @@ def pred_vcf_records_path((path, true_path_, pred_all, chroms, vartype_classes,
         b = (anchor[0] - col_2_pos[anchor[1]])
         for i in nzref_pos:
             col_2_pos[i] += b
-        pos_2_col = {v: k for k, v in col_2_pos.iteritems()}
+        pos_2_col = {v: k for k, v in col_2_pos.items()}
 
         if abs(center_pred - center) < too_far_center:
             if type_pred == "SNP":
@@ -311,7 +312,7 @@ def pred_vcf_records(ref_file, final_preds, true_path, chroms, vartype_classes,
         if o is None:
             raise Exception("pred_vcf_records_path failed!")
 
-    all_vcf_records = filter(None, all_vcf_records)
+    all_vcf_records = list(filter(None, all_vcf_records))
 
     return all_vcf_records
 
@@ -348,7 +349,7 @@ def get_vcf_records(all_vcf_records):
 
 def write_vcf(vcf_records, output_vcf, chroms_order, pass_threshold, lowqual_threshold):
     logger = logging.getLogger(write_vcf.__name__)
-    vcf_records = filter(lambda x: len(x) > 0, vcf_records)
+    vcf_records = list(filter(lambda x: len(x) > 0, vcf_records))
     vcf_records = sorted(vcf_records, key=lambda x: [chroms_order[x[0]], x[1]])
     lines = []
     with open(output_vcf, "w") as ov:
@@ -417,10 +418,10 @@ def call_neusomatic(candidates_tsv, ref_file, out_dir, checkpoint, num_threads,
     # 1. filter out unnecessary keys
     # pretrained_state_dict = {
     #     k: v for k, v in pretrained_state_dict.items() if k in model_dict}
-    if "module." in pretrained_state_dict.keys()[0] and "module." not in model_dict.keys()[0]:
+    if "module." in list(pretrained_state_dict.keys())[0] and "module." not in list(model_dict.keys())[0]:
         pretrained_state_dict = {k.split("module.")[1]: v for k, v in pretrained_state_dict.items(
         ) if k.split("module.")[1] in model_dict}
-    elif "module." not in pretrained_state_dict.keys()[0] and "module." in model_dict.keys()[0]:
+    elif "module." not in list(pretrained_state_dict.keys())[0] and "module." in list(model_dict.keys())[0]:
         pretrained_state_dict = {
             ("module." + k): v for k, v in pretrained_state_dict.items()
             if ("module." + k) in model_dict}
@@ -434,7 +435,7 @@ def call_neusomatic(candidates_tsv, ref_file, out_dir, checkpoint, num_threads,
 
     Ls = []
     for candidate_file in candidates_tsv:
-        idx = pickle.load(open(candidate_file + ".idx"))
+        idx = pickle.load(open(candidate_file + ".idx", "rb"))
         Ls.append(len(idx) - 1)
 
     current_L = 0
diff --git a/neusomatic/python/dataloader.py b/neusomatic/python/dataloader.py
index f77c337..06ece78 100755
--- a/neusomatic/python/dataloader.py
+++ b/neusomatic/python/dataloader.py
@@ -36,7 +36,7 @@ def candidate_loader_tsv(tsv, open_tsv, idx, i):
     tag = fields[2]
     im = extract_zlib(base64.b64decode(fields[3]))
     if len(fields) > 4:
-        anns = map(float, fields[4:])
+        anns = list(map(float, fields[4:]))
     else:
         anns = []
     label = type_class_dict[tag.split(".")[4]]
@@ -45,7 +45,8 @@ def candidate_loader_tsv(tsv, open_tsv, idx, i):
     return tag, im, anns, label
 
 
-def extract_info_tsv((i_b, tsv, idx, L, max_load_candidates, nclasses_t, nclasses_l)):
+def extract_info_tsv(record):
+    i_b, tsv, idx, L, max_load_candidates, nclasses_t, nclasses_l = record
     thread_logger = logging.getLogger(
         "{} ({})".format(extract_info_tsv.__name__, multiprocessing.current_process().name))
     try:
@@ -89,7 +90,7 @@ def extract_info_tsv((i_b, tsv, idx, L, max_load_candidates, nclasses_t, nclasse
                     im = extract_zlib(base64.b64decode(fields[3]))
                     label = type_class_dict[tag.split(".")[4]]
                     if len(fields) > 4:
-                        anns = map(float, fields[4:])
+                        anns = list(map(float, fields[4:]))
                     else:
                         anns = []
                     data.append([tag, im, anns, label])
@@ -147,7 +148,7 @@ def __init__(self, roots, max_load_candidates, transform=None,
                 self.tsvs.append(tsv)
                 for t in range(num_threads):
                     self.open_tsvs[t].append("")
-                idx = pickle.load(open(tsv + ".idx"))
+                idx = pickle.load(open(tsv + ".idx", "rb"))
                 self.idxs.append(idx)
                 self.Ls.append(len(idx) - 1)
         self.data = []
@@ -167,7 +168,7 @@ def __init__(self, roots, max_load_candidates, transform=None,
             Ls_ = []
             for i_b, _ in batch:
                 tsv = self.tsvs[i_b]
-                max_load_ = self.Ls[i_b] * max_load_candidates / \
+                max_load_ = self.Ls[i_b] * max_load_candidates // \
                     total_L if total_L > 0 else 0
                 map_args.append([i_b, tsv, self.idxs[i_b], self.Ls[i_b],
                                  max_load_, nclasses_t, nclasses_l])
@@ -198,8 +199,8 @@ def __init__(self, roots, max_load_candidates, transform=None,
             for matrices, data, none_ids, var_ids, count_class_t, count_class_l in records_:
                 self.matrices += matrices
                 self.data += data
-                self.none_ids += map(lambda x: x + j, none_ids)
-                self.var_ids += map(lambda x: x + j, var_ids)
+                self.none_ids += list(map(lambda x: x + j, none_ids))
+                self.var_ids += list(map(lambda x: x + j, var_ids))
                 for k in range(nclasses_t):
                     self.count_class_t[k] += count_class_t[k]
                 for k in range(nclasses_l):
@@ -271,12 +272,12 @@ def __getitem__(self, index):
             h, w, c = matrix.shape
             r = random.rand()
             if r < 0.6:
-                x_left = random.randint((center - 2) * 2 / 3, center - 2)
+                x_left = random.randint((center - 2) * 2 // 3, center - 2)
             else:
                 x_left = 0
             if r > 0.4:
                 x_right = random.randint(
-                    (w - center - 2) * 2 / 3, w - center - 2)
+                    (w - center - 2) * 2 // 3, w - center - 2)
             else:
                 x_right = 0
             if x_left > 0:
diff --git a/neusomatic/python/extract_postprocess_targets.py b/neusomatic/python/extract_postprocess_targets.py
index 1fa9c94..7547583 100755
--- a/neusomatic/python/extract_postprocess_targets.py
+++ b/neusomatic/python/extract_postprocess_targets.py
@@ -34,7 +34,7 @@ def extract_postprocess_targets(input_vcf, min_len, max_dist, pad):
             if not record_set:
                 record_set.append(record)
                 continue
-            if len(filter(lambda x: (chrom == x[0] and (abs(min(x[1] + len(x[2]), pos + len(ref)) - max(x[1], pos)) <= max_dist)), record_set)) > 0:
+            if len(list(filter(lambda x: (chrom == x[0] and (abs(min(x[1] + len(x[2]), pos + len(ref)) - max(x[1], pos)) <= max_dist)), record_set))) > 0:
                 record_set.append(record)
                 continue
 
@@ -45,7 +45,7 @@ def extract_postprocess_targets(input_vcf, min_len, max_dist, pad):
 
         for ii, record_set in enumerate(record_sets):
             if len(record_set) > 1:
-                if filter(lambda x: len(x[2]) != len(x[3]), record_set):
+                if list(filter(lambda x: len(x[2]) != len(x[3]), record_set)):
                     for x in record_set:
                         fields = x[-1].strip().split()
                         fields[2] = str(ii)
diff --git a/neusomatic/python/filter_candidates.py b/neusomatic/python/filter_candidates.py
index fd98e33..9c2c310 100755
--- a/neusomatic/python/filter_candidates.py
+++ b/neusomatic/python/filter_candidates.py
@@ -15,9 +15,10 @@
 from utils import safe_read_info_dict
 
 
-def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp, min_dp, max_dp, good_ao,
-                       min_ao, snp_min_af, snp_min_bq, snp_min_ao, ins_min_af, del_min_af,
-                       del_merge_min_af, ins_merge_min_af, merge_r)):
+def filter_candidates(candidate_record):
+    candidates_vcf, filtered_candidates_vcf, reference, dbsnp, min_dp, max_dp, good_ao, \
+     min_ao, snp_min_af, snp_min_bq, snp_min_ao, ins_min_af, del_min_af,  \
+     del_merge_min_af, ins_merge_min_af, merge_r = candidate_record
     thread_logger = logging.getLogger(
         "{} ({})".format(filter_candidates.__name__, multiprocessing.current_process().name))
     try:
@@ -35,7 +36,7 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
                 chrom, pos, _, ref, alt, _, _, info_, _, info = line.strip().split()
                 pos = int(pos)
                 loc = "{}.{}".format(chrom, pos)
-                dp, ro, ao = map(int, info.split(":")[1:4])
+                dp, ro, ao = list(map(int, info.split(":")[1:4]))
                 info_dict = dict(map(lambda x: x.split(
                     "="), filter(None, info_.split(";"))))
                 mq_ = safe_read_info_dict(info_dict, "MQ", int, -100)
@@ -71,15 +72,15 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
         fasta_file = pysam.Fastafile(reference)
         good_records = []
         dels = []
-        for loc, rs in sorted(records.iteritems(), key=lambda x: x[1][0:2]) + \
+        for loc, rs in sorted(records.items(), key=lambda x: x[1][0:2]) + \
                 [["", [["", 0, "", "", 0, 0, 0, ""]]]]:
-            ins = filter(lambda x: x[2] == "N", rs)
+            ins = list(filter(lambda x: x[2] == "N", rs))
             if len(ins) > 1:
                 # emit ins
-                afs = map(lambda x: x[6] / float(x[5] + x[6]), ins)
+                afs = list(map(lambda x: x[6] / float(x[5] + x[6]), ins))
                 max_af = max(afs)
-                ins = filter(lambda x: x[6] / float(x[5] +
-                                                    x[6]) >= (max_af * merge_r), ins)
+                ins = list(filter(lambda x: x[6] / float(x[5] +
+                                                    x[6]) >= (max_af * merge_r), ins))
                 chrom, pos, ref = ins[0][0:3]
                 dp = max(map(lambda x: x[4], ins))
                 ro = max(map(lambda x: x[5], ins))
@@ -109,7 +110,7 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
                 else:
                     ins = [ins[0][:-1]]
             good_records.extend(ins)
-            if dels and (ins or filter(lambda x: x[3] != "N" and x[2] != "N", rs)):
+            if dels and (ins or list(filter(lambda x: x[3] != "N" and x[2] != "N", rs))):
                 # emit del
                 if len(dels) == 1:
                     ro = dels[0][5]
@@ -119,11 +120,11 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
                         good_records.extend(dels)
 
                 else:
-                    afs = map(lambda x: x[6] / float(x[5] + x[6]), dels)
+                    afs = list(map(lambda x: x[6] / float(x[5] + x[6]), dels))
                     max_af = max(afs)
                     merge_r_thr = merge_r * max_af
-                    dels = filter(
-                        lambda x: x[6] / float(x[5] + x[6]) >= merge_r_thr, dels)
+                    dels = list(filter(
+                        lambda x: x[6] / float(x[5] + x[6]) >= merge_r_thr, dels))
                     chrom, pos = dels[0][0:2]
                     dp = max(map(lambda x: x[4], dels))
                     ro = max(map(lambda x: x[5], dels))
@@ -172,12 +173,12 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
                                 if ao / float(ro + ao) >= ((del_min_af)):
                                     good_records.extend(dels)
                             else:
-                                afs = map(lambda x: x[6] /
-                                          float(x[5] + x[6]), dels)
+                                afs = list(map(lambda x: x[6] /
+                                          float(x[5] + x[6]), dels))
                                 max_af = max(afs)
                                 merge_r_thr = merge_r * max_af
-                                dels = filter(
-                                    lambda x: x[6] / float(x[5] + x[6]) >= merge_r_thr, dels)
+                                dels = list(filter(
+                                    lambda x: x[6] / float(x[5] + x[6]) >= merge_r_thr, dels))
                                 chrom, pos = dels[0][0:2]
                                 dp = max(map(lambda x: x[4], dels))
                                 ro = max(map(lambda x: x[5], dels))
@@ -277,8 +278,8 @@ def filter_candidates((candidates_vcf, filtered_candidates_vcf, reference, dbsnp
                 non_in_dbsnp_ids.append(int(x[5]))
             for x in non_in_dbsnp_2:
                 non_in_dbsnp_ids.append(int(x[5]))
-            final_records = map(lambda x: x[1], filter(
-                lambda x: x[0] in non_in_dbsnp_ids, enumerate(final_records)))
+            final_records = list(map(lambda x: x[1], filter(
+                lambda x: x[0] in non_in_dbsnp_ids, enumerate(final_records))))
         with open(filtered_candidates_vcf, "w") as o_f:
             o_f.write("##fileformat=VCFv4.2\n")
             o_f.write(
diff --git a/neusomatic/python/generate_dataset.py b/neusomatic/python/generate_dataset.py
index 8525b36..04b4260 100755
--- a/neusomatic/python/generate_dataset.py
+++ b/neusomatic/python/generate_dataset.py
@@ -26,8 +26,8 @@
 NUC_to_NUM = {"a": 1, "c": 2, "g": 3, "t": 4, "-": 5, " ": 0, "A": 1, "C": 2, "G": 3, "T": 4, "N": 5,
               "b": 1, "d": 2, "h": 3, "u": 4}
 snp = {"a": "b", "c": "d", "g": "h", "t": "u"}
-NUM_to_NUC = {v: k for k, v in NUC_to_NUM.iteritems()}
-NUM_to_NUC_hp = {v: k for k, v in NUC_to_NUM_hp.iteritems()}
+NUM_to_NUC = {v: k for k, v in NUC_to_NUM.items()}
+NUM_to_NUC_hp = {v: k for k, v in NUC_to_NUM_hp.items()}
 
 
 def get_type(ref, alt):
@@ -115,14 +115,14 @@ def get_variant_matrix_tabix(ref_file, count_bed, record, matrix_base_pad, chrom
                 col_pos_map[pos_] = cnt
                 cnt += 1
             curr_pos = pos_ + 1
-        matrix_.append(map(int, rec[4].split(":")))
-        bq_matrix_.append(map(int, rec[5].split(":")))
-        mq_matrix_.append(map(int, rec[6].split(":")))
-        st_matrix_.append(map(int, rec[7].split(":")))
-        lsc_matrix_.append(map(int, rec[8].split(":")))
-        rsc_matrix_.append(map(int, rec[9].split(":")))
+        matrix_.append(list(map(int, rec[4].split(":"))))
+        bq_matrix_.append(list(map(int, rec[5].split(":"))))
+        mq_matrix_.append(list(map(int, rec[6].split(":"))))
+        st_matrix_.append(list(map(int, rec[7].split(":"))))
+        lsc_matrix_.append(list(map(int, rec[8].split(":"))))
+        rsc_matrix_.append(list(map(int, rec[9].split(":"))))
         for iii in range(len(tag_matrices_)):
-            tag_matrices_[iii].append(map(int, rec[10 + iii].split(":")))
+            tag_matrices_[iii].append(list(map(int, rec[10 + iii].split(":"))))
         ref_array.append(NUC_to_NUM_tabix[ref_base])
         if ref_base != "-" and pos_ not in col_pos_map:
             col_pos_map[pos_] = cnt
@@ -189,9 +189,9 @@ def align_tumor_normal_matrices(record, tumor_matrix_, bq_tumor_matrix_, mq_tumo
         raise(RuntimeError(
             "tumor_col_pos_map  and normal_col_pos_map have different keys."))
 
-    pT = map(lambda x: tumor_col_pos_map[x], sorted(tumor_col_pos_map.keys()))
-    pN = map(lambda x: normal_col_pos_map[
-             x], sorted(normal_col_pos_map.keys()))
+    pT = list(map(lambda x: tumor_col_pos_map[x], sorted(tumor_col_pos_map.keys())))
+    pN = list(map(lambda x: normal_col_pos_map[
+             x], sorted(normal_col_pos_map.keys())))
 
     if pT[0] != pN[0]:
         logger.error("record: {}".format(record))
@@ -275,9 +275,9 @@ def align_tumor_normal_matrices(record, tumor_matrix_, bq_tumor_matrix_, mq_tumo
     new_normal_ref_array[map_N] = normal_ref_array
 
     new_tumor_col_pos_map = {k: map_T[v]
-                             for k, v in tumor_col_pos_map.iteritems()}
+                             for k, v in tumor_col_pos_map.items()}
     new_normal_col_pos_map = {k: map_N[v]
-                              for k, v in normal_col_pos_map.iteritems()}
+                              for k, v in normal_col_pos_map.items()}
 
     if sum(new_normal_ref_array - new_tumor_ref_array) != 0:
         logger.error("record: {}".format(record))
@@ -382,8 +382,8 @@ def prepare_info_matrices_tabix(ref_file, tumor_count_bed, normal_count_bed, rec
             a = [-1000] + sorted(np.where((ref_array == 0))[0]) + [1000]
             b = np.diff((np.diff(a) == 1).astype(int))
             a = a[1:-1]
-            blocks = map(lambda x: [a[x[0]], a[x[1]]], zip(
-                np.where(b == 1)[0], np.where(b == -1)[0]))
+            blocks = list(map(lambda x: [a[x[0]], a[x[1]]], zip(
+                np.where(b == 1)[0], np.where(b == -1)[0])))
             if blocks:
                 largest_block = sorted(
                     blocks, key=lambda x: x[1] - x[0] + 1)[-1]
@@ -398,19 +398,19 @@ def prepare_info_matrices_tabix(ref_file, tumor_count_bed, normal_count_bed, rec
                                   ) - set(cols_not_to_del)))
     if n_col - len(cols_to_del) > tw:
         mn = min(count_column[np.argsort(count_column)][
-                 n_col - tw - 1], max(n_row / 5, min_ev_frac_per_col * n_row))
+                 n_col - tw - 1], max(n_row // 5, min_ev_frac_per_col * n_row))
         new_cols_to_del = set(np.where(np.logical_and(count_column <= mn, (ref_array == 0)))[
                               0]) - set(cols_to_del) - set(cols_not_to_del)
         if n_col - (len(cols_to_del) + len(new_cols_to_del)) < tw and len(new_cols_to_del) > 3 and len(alt) > len(ref):
-            new_cols_to_del = map(
-                lambda x: [count_column[x], x], new_cols_to_del)
+            new_cols_to_del = list(map(
+                lambda x: [count_column[x], x], new_cols_to_del))
             new_cols_to_del = sorted(new_cols_to_del, key=lambda x: [x[0], x[1]])[
                 0:len(new_cols_to_del) - 4]
-            new_cols_to_del = map(lambda x: x[1], new_cols_to_del)
+            new_cols_to_del = list(map(lambda x: x[1], new_cols_to_del))
         cols_to_del = sorted(set(list(new_cols_to_del) + list(cols_to_del)))
     cols_to_del = list(set(cols_to_del))
     if n_col - len(cols_to_del) > tw:
-        for i in range(n_col / 10):
+        for i in range(n_col // 10):
             if i not in cols_to_del and sum(rcenter) > -3:
                 ref_b = ref_array[i]
                 if tumor_matrix_[ref_b, i] > .8 * n_row:
@@ -434,7 +434,7 @@ def prepare_info_matrices_tabix(ref_file, tumor_count_bed, normal_count_bed, rec
                                                           set(range(largest_block[0], largest_block[1] + 1)))))
 
     cols_to_del.sort()
-    for i, v in col_pos_map.iteritems():
+    for i, v in col_pos_map.items():
         col_pos_map[i] -= sum(np.array(cols_to_del) < v)
 
     tumor_matrix = np.delete(tumor_matrix_, cols_to_del, 1)
@@ -464,60 +464,60 @@ def prepare_info_matrices_tabix(ref_file, tumor_count_bed, normal_count_bed, rec
 
     ncols = tumor_matrix.shape[1]
     if matrix_width >= ncols:
-        col_pos_map = {i: v + (matrix_width - ncols) /
-                       2 for i, v in col_pos_map.iteritems()}
+        col_pos_map = {i: v + (matrix_width - ncols) //
+                       2 for i, v in col_pos_map.items()}
         tumor_count_matrix = np.zeros((5, matrix_width))
-        tumor_count_matrix[:, (matrix_width - ncols) /
-                           2:(matrix_width - ncols) / 2 + ncols] = tumor_matrix
+        tumor_count_matrix[:, (matrix_width - ncols) //
+                           2:(matrix_width - ncols) // 2 + ncols] = tumor_matrix
         bq_tumor_count_matrix = np.zeros((5, matrix_width))
         bq_tumor_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = bq_tumor_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = bq_tumor_matrix
         mq_tumor_count_matrix = np.zeros((5, matrix_width))
         mq_tumor_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = mq_tumor_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = mq_tumor_matrix
         st_tumor_count_matrix = np.zeros((5, matrix_width))
         st_tumor_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = st_tumor_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = st_tumor_matrix
         lsc_tumor_count_matrix = np.zeros((5, matrix_width))
         lsc_tumor_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = lsc_tumor_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = lsc_tumor_matrix
         rsc_tumor_count_matrix = np.zeros((5, matrix_width))
         rsc_tumor_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = rsc_tumor_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = rsc_tumor_matrix
         tag_tumor_count_matrices = []
         for iii in range(len(tag_tumor_matrices)):
             tag_tumor_count_matrices.append(np.zeros((5, matrix_width)))
             tag_tumor_count_matrices[iii][
-                :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = tag_tumor_matrices[iii]
+                :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = tag_tumor_matrices[iii]
         normal_count_matrix = np.zeros((5, matrix_width))
         normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = normal_matrix
         bq_normal_count_matrix = np.zeros((5, matrix_width))
         bq_normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = bq_normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = bq_normal_matrix
         mq_normal_count_matrix = np.zeros((5, matrix_width))
         mq_normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = mq_normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = mq_normal_matrix
         st_normal_count_matrix = np.zeros((5, matrix_width))
         st_normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = st_normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = st_normal_matrix
         lsc_normal_count_matrix = np.zeros((5, matrix_width))
         lsc_normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = lsc_normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = lsc_normal_matrix
         rsc_normal_count_matrix = np.zeros((5, matrix_width))
         rsc_normal_count_matrix[
-            :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = rsc_normal_matrix
+            :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = rsc_normal_matrix
         tag_normal_count_matrices = []
         for iii in range(len(tag_normal_matrices)):
             tag_normal_count_matrices.append(np.zeros((5, matrix_width)))
             tag_normal_count_matrices[iii][
-                :, (matrix_width - ncols) / 2:(matrix_width - ncols) / 2 + ncols] = tag_normal_matrices[iii]
+                :, (matrix_width - ncols) // 2:(matrix_width - ncols) // 2 + ncols] = tag_normal_matrices[iii]
         ref_count_matrix = np.zeros((5, matrix_width))
-        ref_count_matrix[:, (matrix_width - ncols) /
-                         2:(matrix_width - ncols) / 2 + ncols] = ref_matrix
+        ref_count_matrix[:, (matrix_width - ncols) //
+                         2:(matrix_width - ncols) // 2 + ncols] = ref_matrix
     else:
         col_pos_map = {i: int(round(v / float(ncols) * matrix_width))
-                       for i, v in col_pos_map.iteritems()}
+                       for i, v in col_pos_map.items()}
         tumor_count_matrix = imresize(
             tumor_matrix, (5, matrix_width)).astype(int)
         bq_tumor_count_matrix = imresize(
@@ -568,8 +568,11 @@ def prepare_info_matrices_tabix(ref_file, tumor_count_bed, normal_count_bed, rec
             tag_normal_count_matrices, center, rlen, col_pos_map]
 
 
-def prep_data_single_tabix((ref_file, tumor_count_bed, normal_count_bed, record, vartype, rlen, rcenter, ch_order,
-                            matrix_base_pad, matrix_width, min_ev_frac_per_col, min_cov, ann, chrom_lengths)):
+def prep_data_single_tabix(input_record):
+
+    ref_file, tumor_count_bed, normal_count_bed, record, vartype, rlen, rcenter, ch_order, \
+    matrix_base_pad, matrix_width, min_ev_frac_per_col, min_cov, ann, chrom_lengths = input_record
+
     thread_logger = logging.getLogger(
         "{} ({})".format(prep_data_single_tabix.__name__, multiprocessing.current_process().name))
     try:
@@ -648,7 +651,7 @@ def prep_data_single_tabix((ref_file, tumor_count_bed, normal_count_bed, record,
             candidate_mat, 255)).astype(np.uint8)
         tag = "{}.{}.{}.{}.{}.{}.{}.{}.{}".format(ch_order, pos, ref[0:55], alt[
                                                   0:55], vartype, center, rlen, tumor_cov, normal_cov)
-        candidate_mat = base64.b64encode(zlib.compress(candidate_mat))
+        candidate_mat = base64.b64encode(zlib.compress(candidate_mat)).decode('ascii')
         return tag, candidate_mat, record[0:4], ann
     except Exception as ex:
         thread_logger.error(traceback.format_exc())
@@ -714,8 +717,8 @@ def push_lr(fasta_file, record, left_right_both):
                                 new_ref, new_alt] + record[4:])
             record = [chrom, new_pos -
                       len(new_ref), new_ref, new_alt] + record[4:]
-        eqs = map(lambda x: eqs[x], dict(
-            map(lambda x: ["_".join(map(str, x[1])), x[0]], enumerate(eqs))).values())
+        eqs = list(map(lambda x: eqs[x], dict(
+            map(lambda x: ["_".join(map(str, x[1])), x[0]], enumerate(eqs))).values()))
 
         for eq in eqs:
             c, p, r, a = eq[0:4]
@@ -731,7 +734,7 @@ def merge_records(fasta_file, records):
     if len(records) == 1:
         return records[0][0:4]
     chrom = records[0][0]
-    pos_s = map(lambda x: x[1], records)
+    pos_s = list(map(lambda x: x[1], records))
     pos_m = min(pos_s) - 1
     b = pos_m
     ref2_ = ""
@@ -821,7 +824,8 @@ def find_len(ref, alt):
     return max(len(ref_), len(alt_))
 
 
-def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_file, ensemble_bed, work_index)):
+def find_records(input_record):
+    work, split_region_file, truth_vcf_file, pred_vcf_file, ref_file, ensemble_bed, work_index = input_record
     thread_logger = logging.getLogger(
         "{} ({})".format(find_records.__name__, multiprocessing.current_process().name))
     try:
@@ -850,12 +854,13 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
             pybedtools.BedTool(ensemble_bed).intersect(
                 split_bed, u=True).saveas(split_ensemble_bed_file)
             pybedtools.BedTool(split_ensemble_bed_file).window(split_pred_vcf_file, w=5, v=True).each(
-                lambda x: pybedtools.Interval(x[0], int(x[1]), int(x[1]) + 1, x[3], x[4], ".", otherfields=[".", ".", ".", "."])).saveas(split_missed_ensemble_bed_file)
+                lambda x: pybedtools.Interval(x[0], int(x[1]), int(x[1]) + 1, x[3], x[4], 
+                ".", otherfields=[".".encode('utf-8'), ".".encode('utf-8'), ".".encode('utf-8'), ".".encode('utf-8')])).saveas(split_missed_ensemble_bed_file)
             concatenate_vcfs(
                 [split_pred_vcf_file, split_missed_ensemble_bed_file], split_pred_with_missed_file)
             pred_with_missed = pybedtools.BedTool(split_pred_with_missed_file).each(
                 lambda x: pybedtools.create_interval_from_list(
-                    [x[0], str(x[1]), ".", x[3], x[4], ".", ".", ".", ".", "."])).sort().saveas(
+                    [x[0], str(x[1]), ".", x[3], x[4], ".".encode('utf-8'), ".".encode('utf-8'), ".".encode('utf-8'), ".".encode('utf-8'), ".".encode('utf-8')])).sort().saveas(
                 split_pred_with_missed_file)
             not_in_ensemble_bed = pybedtools.BedTool(
                 pred_with_missed).window(split_ensemble_bed_file, w=1, v=True)
@@ -933,11 +938,11 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                                             len(record_[14]) > len(record_[4]) and record_[14][0:len(record_[4])] == record_[4]):
                                         ann = record_[15:]
                             if ann:
-                                ann = map(float, ann)
+                                ann = list(map(float, ann))
                             rrs.append([r_, ann])
                         max_ann = max(map(lambda x: sum(x[1]), rrs))
                         if max_ann > 0:
-                            rrs = filter(lambda x: sum(x[1]) > 0, rrs)
+                            rrs = list(filter(lambda x: sum(x[1]) > 0, rrs))
                         elif max_ann == 0:
                             rrs = rrs[0:1]
                         for r_, ann in rrs:
@@ -970,7 +975,7 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                         records.append(rr + [str(i)])
                         i += 1
         records_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(
-            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), records))
+            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), records)).saveas()
 
         truth_records = []
         i = 0
@@ -984,9 +989,9 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                 i += 1
 
         truth_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(
-            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), truth_records))
+            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), truth_records)).saveas()
         none_records_0 = records_bed.window(truth_bed, w=5, v=True)
-        none_records_ids = map(lambda x: int(x[5]), none_records_0)
+        none_records_ids = list(map(lambda x: int(x[5]), none_records_0))
         other_records = records_bed.window(truth_bed, w=5)
         map_pred_2_truth = {}
         map_truth_2_pred = {}
@@ -1008,7 +1013,7 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
         good_records = {"INS": [], "DEL": [], "SNP": []}
         vtype = {}
         record_len = {}
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
             for j in js:
                 record = records[j]
@@ -1022,11 +1027,11 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                         good_records[vartype].append(j)
                         vtype[j] = vartype
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
         done_js = list(good_records_idx)
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
             if set(js) & set(good_records_idx) or set(js) & set(done_js):
                 continue
@@ -1045,7 +1050,7 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                     mt = merge_records(fasta_file, t_)
                     if mt:
                         mt2, eqs2 = push_lr(fasta_file, mt, 2)
-                        eqs2 = map(lambda x: "_".join(map(str, x[0:4])), eqs2)
+                        eqs2 = list(map(lambda x: "_".join(map(str, x[0:4])), eqs2))
                         for idx_jj, jj in enumerate(js):
                             for n_merge_j in range(0, len(js) - idx_jj):
                                 r_j = js[idx_jj:idx_jj + n_merge_j + 1]
@@ -1073,8 +1078,8 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                                         if not set(js) - set(done_js_):
                                             done = True
 
-        perfect_idx = [i for w in good_records.values() for i in w]
-        good_records_idx = [i for w in good_records.values() for i in w]
+        perfect_idx = [i for w in list(good_records.values()) for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
         for j in remained_idx:
@@ -1115,10 +1120,10 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                     if done:
                         break
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
             if set(js) & set(good_records_idx):
                 continue
@@ -1138,10 +1143,10 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                         good_records[vartype].append(j)
                         vtype[j] = vartype
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
             if set(js) & set(good_records_idx):
                 continue
@@ -1159,10 +1164,10 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                     record_len[j] = find_len(ref_t, alt_t)
                     record_center[j] = rc
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
 
             if set(js) & set(good_records_idx):
@@ -1184,10 +1189,10 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                         record_len[j] = find_len(ref_t, alt_t)
                         record_center[j] = rc
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
             if set(js) & set(good_records_idx):
                 continue
@@ -1203,16 +1208,16 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
                     record_center[j] = find_i_center(ref, alt)
                     record_len[j] = find_len(ref_t, alt_t)
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
-        for i, js in map_truth_2_pred.iteritems():
+        for i, js in map_truth_2_pred.items():
             truth_record = truth_records[i]
 
             if set(js) & set(good_records_idx):
                 continue
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
         remained_idx = sorted(set(range(len(records))) -
                               (set(good_records_idx) | set(none_records_ids)))
         for j in remained_idx:
@@ -1225,15 +1230,15 @@ def find_records((work, split_region_file, truth_vcf_file, pred_vcf_file, ref_fi
             vtype[j] = "NONE"
             record_len[j] = 0
 
-        good_records_idx = [i for w in good_records.values() for i in w]
+        good_records_idx = [i for w in list(good_records.values()) for i in w]
 
         records_r = [records[x] for k, w in good_records.items() for x in w]
         records_r_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(
-            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), records_r))
+            x[0], x[1], x[1] + len(x[2]), x[2], x[3], x[4]), records_r)).saveas()
 
         N_none = len(none_records_ids)
         thread_logger.info("N_none: {} ".format(N_none))
-        none_records = map(lambda x: records[x], none_records_ids)
+        none_records = list(map(lambda x: records[x], none_records_ids))
         none_records = sorted(none_records, key=lambda x: [x[0], int(x[1])])
 
         return records_r, none_records, vtype, record_len, record_center, chroms_order, anns
@@ -1259,11 +1264,11 @@ def extract_ensemble(work, ensemble_tsv):
             fields = line.strip().split()
             fields[2] = str(int(fields[1]) + len(fields[3]))
             ensemble_pos.append(fields[0:5])
-            ensemble_data.append(map(lambda x: float(
-                x.replace("False", "0").replace("True", "1")), fields[5:105]))
+            ensemble_data.append(list(map(lambda x: float(
+                x.replace("False", "0").replace("True", "1")), fields[5:105])))
     ensemble_data = np.array(ensemble_data)
 
-    cov_features = map(lambda x: x[0], filter(lambda x: x[1] in [
+    cov_features = list(map(lambda x: x[0], filter(lambda x: x[1] in [
         "Consistent_Mates", "Inconsistent_Mates", "N_DP",
         "nBAM_REF_NM", "nBAM_ALT_NM", "nBAM_REF_Concordant", "nBAM_REF_Discordant", "nBAM_ALT_Concordant", "nBAM_ALT_Discordant",
         "N_REF_FOR", "N_REF_REV", "N_ALT_FOR", "N_ALT_REV", "nBAM_REF_Clipped_Reads", "nBAM_ALT_Clipped_Reads",  "nBAM_MQ0", "nBAM_Other_Reads", "nBAM_Poor_Reads",
@@ -1274,48 +1279,48 @@ def extract_ensemble(work, ensemble_tsv):
         "tBAM_REF_Clipped_Reads", "tBAM_ALT_Clipped_Reads",
         "tBAM_MQ0", "tBAM_Other_Reads", "tBAM_Poor_Reads", "tBAM_REF_InDel_3bp", "tBAM_REF_InDel_2bp",
         "tBAM_REF_InDel_1bp", "tBAM_ALT_InDel_3bp", "tBAM_ALT_InDel_2bp", "tBAM_ALT_InDel_1bp",
-    ], enumerate(header)))
-    mq_features = map(lambda x: x[0], filter(lambda x: x[1] in [
-                      "nBAM_REF_MQ", "nBAM_ALT_MQ", "tBAM_REF_MQ", "tBAM_ALT_MQ"], enumerate(header)))
-    bq_features = map(lambda x: x[0], filter(lambda x: x[1] in [
-                      "nBAM_REF_BQ", "nBAM_ALT_BQ", "tBAM_REF_BQ", "tBAM_ALT_BQ"], enumerate(header)))
-    nm_diff_features = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["nBAM_NM_Diff", "tBAM_NM_Diff"], enumerate(header)))
-    ranksum_features = map(lambda x: x[0], filter(lambda x: x[1] in ["nBAM_Z_Ranksums_MQ", "nBAM_Z_Ranksums_BQ",
-                                                                     "nBAM_Z_Ranksums_EndPos", "tBAM_Z_Ranksums_BQ",  "tBAM_Z_Ranksums_MQ", "tBAM_Z_Ranksums_EndPos", ], enumerate(header)))
-    zero_to_one_features = map(lambda x: x[0], filter(lambda x: x[1] in ["if_MuTect", "if_VarScan2", "if_SomaticSniper", "if_VarDict",
+    ], enumerate(header))))
+    mq_features = list(map(lambda x: x[0], filter(lambda x: x[1] in [
+                      "nBAM_REF_MQ", "nBAM_ALT_MQ", "tBAM_REF_MQ", "tBAM_ALT_MQ"], enumerate(header))))
+    bq_features = list(map(lambda x: x[0], filter(lambda x: x[1] in [
+                      "nBAM_REF_BQ", "nBAM_ALT_BQ", "tBAM_REF_BQ", "tBAM_ALT_BQ"], enumerate(header))))
+    nm_diff_features = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["nBAM_NM_Diff", "tBAM_NM_Diff"], enumerate(header))))
+    ranksum_features = list(map(lambda x: x[0], filter(lambda x: x[1] in ["nBAM_Z_Ranksums_MQ", "nBAM_Z_Ranksums_BQ",
+                                                                     "nBAM_Z_Ranksums_EndPos", "tBAM_Z_Ranksums_BQ",  "tBAM_Z_Ranksums_MQ", "tBAM_Z_Ranksums_EndPos", ], enumerate(header))))
+    zero_to_one_features = list(map(lambda x: x[0], filter(lambda x: x[1] in ["if_MuTect", "if_VarScan2", "if_SomaticSniper", "if_VarDict",
                                                                          "MuSE_Tier", "if_Strelka"] + ["nBAM_Concordance_FET", "nBAM_StrandBias_FET", "nBAM_Clipping_FET",
-                                                                                                       "tBAM_Concordance_FET", "tBAM_StrandBias_FET", "tBAM_Clipping_FET"] + ["if_dbsnp", "COMMON"] + ["M2_STR"], enumerate(header)))
-    stralka_scor = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["Strelka_Score"], enumerate(header)))
-    stralka_qss = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["Strelka_QSS"], enumerate(header)))
-    stralka_tqss = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["Strelka_TQSS"], enumerate(header)))
-    varscan2_score = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["VarScan2_Score"], enumerate(header)))
-    vardict_score = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["VarDict_Score"], enumerate(header)))
-    m2_lod = map(lambda x: x[0], filter(lambda x: x[1] in [
-                 "M2_NLOD", "M2_TLOD"], enumerate(header)))
-    sniper_score = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["Sniper_Score"], enumerate(header)))
-    m2_ecent = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["M2_ECNT"], enumerate(header)))
-    sor = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["SOR"], enumerate(header)))
-    msi = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["MSI"], enumerate(header)))
-    msilen = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["MSILEN"], enumerate(header)))
-    shift3 = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["SHIFT3"], enumerate(header)))
-    MaxHomopolymer_Length = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["MaxHomopolymer_Length"], enumerate(header)))
-    SiteHomopolymer_Length = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["SiteHomopolymer_Length"], enumerate(header)))
-    InDel_Length = map(lambda x: x[0], filter(
-        lambda x: x[1] in ["InDel_Length"], enumerate(header)))
+                                                                                                       "tBAM_Concordance_FET", "tBAM_StrandBias_FET", "tBAM_Clipping_FET"] + ["if_dbsnp", "COMMON"] + ["M2_STR"], enumerate(header))))
+    stralka_scor = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["Strelka_Score"], enumerate(header))))
+    stralka_qss = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["Strelka_QSS"], enumerate(header))))
+    stralka_tqss = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["Strelka_TQSS"], enumerate(header))))
+    varscan2_score = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["VarScan2_Score"], enumerate(header))))
+    vardict_score = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["VarDict_Score"], enumerate(header))))
+    m2_lod = list(map(lambda x: x[0], filter(lambda x: x[1] in [
+                 "M2_NLOD", "M2_TLOD"], enumerate(header))))
+    sniper_score = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["Sniper_Score"], enumerate(header))))
+    m2_ecent = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["M2_ECNT"], enumerate(header))))
+    sor = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["SOR"], enumerate(header))))
+    msi = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["MSI"], enumerate(header))))
+    msilen = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["MSILEN"], enumerate(header))))
+    shift3 = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["SHIFT3"], enumerate(header))))
+    MaxHomopolymer_Length = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["MaxHomopolymer_Length"], enumerate(header))))
+    SiteHomopolymer_Length = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["SiteHomopolymer_Length"], enumerate(header))))
+    InDel_Length = list(map(lambda x: x[0], filter(
+        lambda x: x[1] in ["InDel_Length"], enumerate(header))))
 
     min_max_features = [[cov_features, 0, 2 * COV],
                         [mq_features, 0, 70],
@@ -1340,7 +1345,7 @@ def extract_ensemble(work, ensemble_tsv):
                         [InDel_Length, -30, 30],
                         ]
     selected_features = sorted([i for f in min_max_features for i in f[0]])
-    selected_features_tags = map(lambda x: header[x], selected_features)
+    selected_features_tags = list(map(lambda x: header[x], selected_features))
     for i_s, mn, mx in min_max_features:
         s = ensemble_data[:, np.array(i_s)]
         s = np.maximum(np.minimum(s, mx), mn)
@@ -1387,7 +1392,7 @@ def generate_dataset(work, truth_vcf_file, mode,  tumor_pred_vcf_file, region_be
         len_candids += len(pybedtools.BedTool(ensemble_bed)
                            .intersect(region_bed_file, u=True))
     logger.info("len_candids: {}".format(len_candids))
-    num_splits = max(len_candids / split_batch_size, num_threads)
+    num_splits = max(len_candids // split_batch_size, num_threads)
     split_region_files = split_region(
         work, region_bed_file, num_splits, shuffle_intervals=True)
 
@@ -1421,8 +1426,8 @@ def generate_dataset(work, truth_vcf_file, mode,  tumor_pred_vcf_file, region_be
     for records_r, none_records, vtype, record_len, record_center, chroms_order, anns in records_data:
         total_ims += len(records_r) + len(none_records)
 
-    candidates_split = int(total_ims / tsv_batch_size) + 1
-    is_split = total_ims / candidates_split
+    candidates_split = int(total_ims // tsv_batch_size) + 1
+    is_split = total_ims // candidates_split
     with open(var_vcf, "w") as vv, open(none_vcf, "w") as nv:
         is_current = 0
         is_ = -1
@@ -1519,8 +1524,8 @@ def generate_dataset(work, truth_vcf_file, mode,  tumor_pred_vcf_file, region_be
                         vv.write("\t".join([record[0], str(record[1]), ".", record[2], record[
                                  3], ".", ".", "TAG={};".format(tag), ".", "."]) + "\n")
                         tsv_idx.append(b_o.tell())
-                        b_o.write("\t".join([str(cnt_ims), "1", tag, compressed_candidate_mat] + map(
-                            lambda x: str(np.round(x, 4)), ann)) + "\n")
+                        b_o.write("\t".join([str(cnt_ims), "1", tag, compressed_candidate_mat] + list(map(
+                            lambda x: str(np.round(x, 4)), ann))) + "\n")
                         cnt_ims += 1
                 for x in none_records_done:
                     if x:
@@ -1528,11 +1533,11 @@ def generate_dataset(work, truth_vcf_file, mode,  tumor_pred_vcf_file, region_be
                         nv.write("\t".join([record[0], str(record[1]), ".", record[2], record[
                                  3], ".", ".", "TAG={};".format(tag), ".", "."]) + "\n")
                         tsv_idx.append(b_o.tell())
-                        b_o.write("\t".join([str(cnt_ims), "1", tag, compressed_candidate_mat] + map(
-                            lambda x: str(np.round(x, 4)), ann)) + "\n")
+                        b_o.write("\t".join([str(cnt_ims), "1", tag, compressed_candidate_mat] + list(map(
+                            lambda x: str(np.round(x, 4)), ann))) + "\n")
                         cnt_ims += 1
                 tsv_idx.append(b_o.tell())
-            pickle.dump(tsv_idx, open(candidates_tsv_file + ".idx", "w"))
+            pickle.dump(tsv_idx, open(candidates_tsv_file + ".idx", "wb"))
             is_current = is_end
             if is_current >= total_ims:
                 break
diff --git a/neusomatic/python/long_read_indelrealign.py b/neusomatic/python/long_read_indelrealign.py
index 37c0c54..1f2da92 100755
--- a/neusomatic/python/long_read_indelrealign.py
+++ b/neusomatic/python/long_read_indelrealign.py
@@ -15,6 +15,7 @@
 import logging
 from random import shuffle
 import csv
+import functools
 
 import numpy as np
 import pybedtools
@@ -125,8 +126,8 @@ def fix_record(self, record, ref_seq):
         bias = 0
         for region_start, region_end, start_idx, end_idx, del_start, del_end, \
                 pos_start, pos_end, new_cigar, excess_start, excess_end in self.realignments:
-            c_array = np.array(map(lambda x: [x[0], x[1][1] if x[1][0]
-                                              != CIGAR_DEL else 0], enumerate(cigartuples)))
+            c_array = np.array(list(map(lambda x: [x[0], x[1][1] if x[1][0]
+                                              != CIGAR_DEL else 0], enumerate(cigartuples))))
             c_map = np.repeat(c_array[:, 0], c_array[:, 1])
 
             c_i = c_map[start_idx - bias]
@@ -197,7 +198,7 @@ def fix_record(self, record, ref_seq):
         if end_hc:
             new_cigartuples_list.append([[CIGAR_HARDCLIP, end_hc]])
 
-        new_cigartuples = reduce(
+        new_cigartuples = functools.reduce(
             lambda x, y: merge_cigartuples(x, y), new_cigartuples_list)
         if len(new_cigartuples) > 2 and new_cigartuples[-1][0] == CIGAR_SOFTCLIP \
                 and new_cigartuples[-2][0] == CIGAR_DEL:
@@ -222,8 +223,8 @@ def fix_record(self, record, ref_seq):
 
         record.cigarstring = cigartuple_to_string(new_cigartuples)
         NM = find_NM(record, ref_seq)
-        record.tags = filter(
-            lambda x: x[0] != "NM", record.tags) + [("NM", int(NM))]
+        record.tags = list(filter(
+            lambda x: x[0] != "NM", record.tags)) + [("NM", int(NM))]
         return record
 
 
@@ -239,8 +240,8 @@ def get_cigar_stat(cigartuple, keys=[]):
 
 def find_NM(record, ref_seq):
     logger = logging.getLogger(find_NM.__name__)
-    positions = np.array(map(lambda x: x if x else -1,
-                             (record.get_reference_positions(full_length=True))))
+    positions = np.array(list(map(lambda x: x if x else -1,
+                             (record.get_reference_positions(full_length=True)))))
     sc_start = (record.cigartuples[0][0] ==
                 CIGAR_SOFTCLIP) * record.cigartuples[0][1]
     sc_end = (record.cigartuples[-1][0] ==
@@ -255,9 +256,9 @@ def find_NM(record, ref_seq):
     non_ins_positions = positions[non_ins]
     mn, mx = min(non_ins_positions), max(non_ins_positions)
     refseq = ref_seq.get_seq(mn, mx + 1)
-    ref_array = np.array(map(lambda x: NUC_to_NUM[x.upper()], list(refseq)))[
+    ref_array = np.array(list(map(lambda x: NUC_to_NUM[x.upper()], list(refseq))))[
         non_ins_positions - mn]
-    q_array = np.array(map(lambda x: NUC_to_NUM[x.upper()], list(q_seq)))[
+    q_array = np.array(list(map(lambda x: NUC_to_NUM[x.upper()], list(q_seq))))[
         non_ins]
     cigar_stat = get_cigar_stat(record.cigartuples, [CIGAR_DEL, CIGAR_INS])
     assert ref_array.shape[0] == q_array.shape[0]
@@ -297,13 +298,13 @@ def prepare_fasta(work, region, input_bam, ref_fasta_file, include_ref, split_i)
                     for record in samfile.fetch(region.chrom, region.start, region.end + 1):
                         if record.is_supplementary and "SA" in dict(record.tags):
                             sas = dict(record.tags)["SA"].split(";")
-                            sas = filter(None, sas)
-                            sas_cigs = map(lambda x: x.split(",")[3], sas)
+                            sas = list(filter(None, sas))
+                            sas_cigs = list(map(lambda x: x.split(",")[3], sas))
                             if record.cigarstring in sas_cigs:
                                 continue
-                        positions = np.array(map(lambda x: x if x else -1,
+                        positions = np.array(list(map(lambda x: x if x else -1,
                                                  (record.get_reference_positions(
-                                                     full_length=True))))
+                                                     full_length=True)))))
                         if not record.cigartuples:
                             continue
                         sc_start = (record.cigartuples[0][0] ==
@@ -326,9 +327,9 @@ def prepare_fasta(work, region, input_bam, ref_fasta_file, include_ref, split_i)
                             0][0] + sc_start
 
                         if max(end_idx - start_idx, pos_end - pos_start) >= (region.span() * 0.75):
-                            c_array = np.array(map(lambda x: [x[0], x[1][1] if x[1][0]
+                            c_array = np.array(list(map(lambda x: [x[0], x[1][1] if x[1][0]
                                                               != CIGAR_DEL else 0],
-                                                   enumerate(record.cigartuples)))
+                                                   enumerate(record.cigartuples))))
                             c_map = np.repeat(c_array[:, 0], c_array[:, 1])
                             c_i = c_map[start_idx]
                             c_e = c_map[end_idx]
@@ -344,10 +345,10 @@ def prepare_fasta(work, region, input_bam, ref_fasta_file, include_ref, split_i)
                             non_ins_positions = positions_[non_ins]
                             mn, mx = min(non_ins_positions), max(
                                 non_ins_positions)
-                            ref_array = np.array(map(lambda x:
+                            ref_array = np.array(list(map(lambda x:
                                                      NUC_to_NUM[x.upper()],
-                                                     list(refseq)))[non_ins_positions - mn]
-                            q_array = np.array(map(lambda x: NUC_to_NUM[x.upper()], list(q_seq)))[
+                                                     list(refseq))))[non_ins_positions - mn]
+                            q_array = np.array(list(map(lambda x: NUC_to_NUM[x.upper()], list(q_seq))))[
                                 non_ins]
                             cigar_stat = get_cigar_stat(
                                 my_cigartuples, [CIGAR_DEL, CIGAR_INS])
@@ -375,13 +376,13 @@ def split_bam_to_chuncks(work, region, input_bam, chunck_size=200,
         for record in samfile.fetch(region.chrom, region.start, region.end + 1):
             if record.is_supplementary and "SA" in dict(record.tags):
                 sas = dict(record.tags)["SA"].split(";")
-                sas = filter(None, sas)
-                sas_cigs = map(lambda x: x.split(",")[3], sas)
+                sas = list(filter(None, sas))
+                sas_cigs = list(map(lambda x: x.split(",")[3], sas))
                 if record.cigarstring in sas_cigs:
                     continue
 
             positions = np.array(
-                map(lambda x: x if x else -1, (record.get_reference_positions(full_length=True))))
+                list(map(lambda x: x if x else -1, (record.get_reference_positions(full_length=True)))))
             if not record.cigartuples:
                 continue
 
@@ -402,16 +403,16 @@ def split_bam_to_chuncks(work, region, input_bam, chunck_size=200,
 
             records.append([record, len(q_seq)])
 
-    records = map(lambda x: x[0], sorted(records, key=lambda x: x[1]))
+    records = list(map(lambda x: x[0], sorted(records, key=lambda x: x[1])))
     if len(records) < chunck_size * chunck_scale:
         bams = [input_bam]
         lens = [len(records)]
     else:
-        n_splits = int(max(6, len(records) / chunck_size))
-        new_chunck_size = len(records) / n_splits
+        n_splits = int(max(6, len(records) // chunck_size))
+        new_chunck_size = len(records) // n_splits
         bams = []
         lens = []
-        n_split = (len(records) / new_chunck_size) + 1
+        n_split = (len(records) // new_chunck_size) + 1
         if 0 < (len(records) - ((n_split - 1) * new_chunck_size) + new_chunck_size) \
                 < new_chunck_size * chunck_scale:
             n_split -= 1
@@ -440,7 +441,7 @@ def split_bam_to_chuncks(work, region, input_bam, chunck_size=200,
 def read_info(info_file):
     logger = logging.getLogger(read_info.__name__)
     info = {}
-    with open(info_file, 'rb') as csvfile:
+    with open(info_file, 'r') as csvfile:
         spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
         for row in spamreader:
             info[int(row[0])] = Read_Info(row[1:])
@@ -459,7 +460,7 @@ def find_cigar(alignment):
         raise Exception
 
     event_type = augmented_alignment[:-1][event_pos[1:]]
-    cigartuple = zip(event_type, event_len)
+    cigartuple = list(zip(event_type, event_len))
     return cigartuple_to_string(cigartuple)
 
 
@@ -476,10 +477,10 @@ def extract_new_cigars(region, info_file, out_fasta_file):
             set(map(int, records.keys())) ^ set(range(len(records)))))
         raise Exception
 
-    alignment = map(lambda x: x[1], sorted(map(lambda x: [int(x[0]), map(lambda x: 0 if x == "-"
-                                                                         else 1, x[1].seq)],
+    alignment = list(map(lambda x: x[1], sorted(map(lambda x: [int(x[0]), list(map(lambda x: 0 if x == "-"
+                                                                         else 1, x[1].seq))],
                                                records.items()),
-                                           key=lambda x: x[0]))
+                                           key=lambda x: x[0])))
     ref_seq = np.array(alignment[0])
     pos_ref = np.cumsum(alignment[0]) - 1
     alignment = np.array(alignment[1:]) - ref_seq
@@ -527,9 +528,9 @@ def nuc_to_num_convert(nuc):
         if nuc.upper() not in NUC_to_NUM.keys():
             nuc = "-"
         return NUC_to_NUM[nuc.upper()]
-    for i, record in records.iteritems():
+    for i, record in records.items():
         ii = int(i) - 1
-        msa[ii] = map(lambda x: nuc_to_num_convert(x), record.seq)
+        msa[ii] = list(map(lambda x: nuc_to_num_convert(x), record.seq))
     msa = np.array(msa, dtype=int)
     consensus = []
     for i in range(align_len):
@@ -558,11 +559,11 @@ def get_final_msa(region, msa_0, consensus, out_fasta_file_1, out_fasta_file_fin
         raise Exception
     align_len = len(records["0"].seq)
     msa_1 = [[] for i in range(2)]
-    for i, record in records.iteritems():
+    for i, record in records.items():
         ii = int(i)
-        msa_1[ii] = map(lambda x: nuc_to_num_convert(x), record.seq)
+        msa_1[ii] = list(map(lambda x: nuc_to_num_convert(x), record.seq))
     msa_1 = np.array(msa_1, dtype=int)
-    consensus_array = np.array(map(lambda x: nuc_to_num_convert(x), consensus))
+    consensus_array = np.array(list(map(lambda x: nuc_to_num_convert(x), consensus)))
     consensus_cumsum = np.cumsum(consensus_array > 0)
     new_cols = np.where(msa_1[1, :] == 0)[0]
     new_cols -= np.arange(len(new_cols))
@@ -622,7 +623,7 @@ def merge_cigartuples(tuple1, tuple2):
 def find_realign_dict(realign_bed_file, chrom):
     logger = logging.getLogger(find_realign_dict.__name__)
     realign_bed = pybedtools.BedTool(
-        realign_bed_file).filter(lambda x: x[0] == chrom)
+        realign_bed_file).filter(lambda x: x[0] == chrom).saveas()
     realign_dict = {}
     chrom_regions = set([])
     for interval in realign_bed:
@@ -638,11 +639,12 @@ def find_realign_dict(realign_bed_file, chrom):
                                             excess_end)
         chrom_regions.add("{}-{}".format(start, end))
     chrom_regions = sorted(
-        map(lambda x: map(int, x.split("-")), chrom_regions))
+        map(lambda x: list(map(int, x.split("-"))), chrom_regions))
     return realign_dict, chrom_regions
 
 
-def correct_bam_chrom((work, input_bam, realign_bed_file, ref_fasta_file, chrom)):
+def correct_bam_chrom(input_record):
+    work, input_bam, realign_bed_file, ref_fasta_file, chrom = input_record
     thread_logger = logging.getLogger(
         "{} ({})".format(correct_bam_chrom.__name__, multiprocessing.current_process().name))
     try:
@@ -729,7 +731,7 @@ def correct_bam_all(work, input_bam, output_bam, ref_fasta_file, realign_bed_fil
 
 def concatenate_sam_files(files, output, bam_header):
     logger = logging.getLogger(concatenate_sam_files.__name__)
-    good_files = filter(lambda x: x and os.path.isfile(x), files)
+    good_files = list(filter(lambda x: x and os.path.isfile(x), files))
     fin = fileinput.input(good_files)
     with open(output, "w") as merged_fd:
         with open(bam_header) as bh_fd:
@@ -831,9 +833,9 @@ def find_var(out_fasta_file, snp_min_af, del_min_af, ins_min_af, scale_maf):
         logger.error("sequences are missing in the alignment {}".format(
             set(map(int, records.keys())) ^ set(range(len(records)))))
         raise Exception
-    alignment = np.array(map(lambda x: x[1], sorted(map(lambda x: [int(x[0]), map(
-        lambda x: NUC_to_NUM[x.upper()], x[1].seq)], records.items()),
-        key=lambda x: x[0])))
+    alignment = np.array(list(map(lambda x: x[1], sorted(map(lambda x: [int(x[0]), list(map(
+        lambda x: NUC_to_NUM[x.upper()], x[1].seq))], records.items()),
+        key=lambda x: x[0]))))
     ref_seq = alignment[0, :]
     counts = np.zeros((5, alignment.shape[1]))
     for i in range(5):
@@ -897,10 +899,11 @@ def TrimREFALT(ref, alt, pos):
     return ref, alt, pos
 
 
-def run_realignment((work, ref_fasta_file, target_region, pad, chunck_size, chunck_scale,
-                     snp_min_af, del_min_af, ins_min_af, len_chr, input_bam,
-                     match_score, mismatch_penalty, gap_open_penalty, gap_ext_penalty,
-                     msa_binary, get_var)):
+def run_realignment(input_record):
+    work, ref_fasta_file, target_region, pad, chunck_size, chunck_scale, \
+    snp_min_af, del_min_af, ins_min_af, len_chr, input_bam, \
+    match_score, mismatch_penalty, gap_open_penalty, gap_ext_penalty, \
+    msa_binary, get_var = input_record
 
     thread_logger = logging.getLogger(
         "{} ({})".format(run_realignment.__name__, multiprocessing.current_process().name))
@@ -1200,8 +1203,8 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
         region_bed_merged = region_bed_merged_tmp
     region_bed_merged.saveas(os.path.join(work, "regions_merged.bed"))
 
-    target_regions = map(
-        lambda x: [x[0], int(x[1]), int(x[2])], region_bed_merged)
+    target_regions = list(map(
+        lambda x: [x[0], int(x[1]), int(x[2])], region_bed_merged))
 
     get_var = True if output_vcf else False
     pool = multiprocessing.Pool(num_threads)
@@ -1227,10 +1230,10 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
         if o is None:
             raise Exception("run_realignment failed!")
 
-    realign_entries = map(lambda x: x[0], realign_output)
+    realign_entries = list(map(lambda x: x[0], realign_output))
 
-    realign_variants = map(lambda x: x[1], realign_output)
-    realign_variants = filter(None, realign_variants)
+    realign_variants = list(map(lambda x: x[1], realign_output))
+    realign_variants = list(filter(None, realign_variants))
 
     if get_var:
         with open(output_vcf, "w") as o_f:
@@ -1254,14 +1257,13 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
     pybedtools.set_tempdir(pybedtmp)
 
     if realign_entries:
-        realign_entries = reduce(lambda x, y: x + y, realign_entries)
+        realign_entries = functools.reduce(lambda x, y: x + y, realign_entries)
     realign_bed_file = os.path.join(work, "realign.bed")
     realign_entries.sort()
     relaign_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(x[0], x[1],
-                                                                       x[2], x[
-                                                                           3],
-                                                                       otherfields=map(str, x[4:])),
-                                         realign_entries)).saveas(realign_bed_file)
+                                    x[2],x[3],
+                                    otherfields=list(map(lambda y:str(y).encode('utf-8'), x[4:]))),
+                                    realign_entries)).saveas(realign_bed_file)
 
     relaign_bed = pybedtools.BedTool(realign_bed_file)
 
diff --git a/neusomatic/python/merge_tsvs.py b/neusomatic/python/merge_tsvs.py
index 76609cd..9be9aef 100755
--- a/neusomatic/python/merge_tsvs.py
+++ b/neusomatic/python/merge_tsvs.py
@@ -48,7 +48,7 @@ def merge_tsvs(input_tsvs, out,
 
     totla_L = 0
     for tsv in input_tsvs:
-        totla_L += len(pickle.load(open(tsv + ".idx"))) - 1
+        totla_L += len(pickle.load(open(tsv + ".idx", "rb"))) - 1
     totla_L = max(0, totla_L)
     candidates_per_tsv = max(candidates_per_tsv, np.ceil(
         totla_L / float(max_num_tsvs)) + 1)
@@ -73,7 +73,7 @@ def merge_tsvs(input_tsvs, out,
                     none_f.write(line)
                     if len(none_idx) >= candidates_per_tsv:
                         none_idx.append(none_f.tell())
-                        pickle.dump(none_idx, open(none_file + ".idx", "w"))
+                        pickle.dump(none_idx, open(none_file + ".idx", "wb"))
                         none_f.close()
                         logger.info(
                             "Done with merge_id: {}".format(len(merged_tsvs)))
@@ -90,7 +90,7 @@ def merge_tsvs(input_tsvs, out,
                     var_f.write(line)
                     if len(var_idx) >= candidates_per_tsv:
                         var_idx.append(var_f.tell())
-                        pickle.dump(var_idx, open(var_file + ".idx", "w"))
+                        pickle.dump(var_idx, open(var_file + ".idx", "wb"))
                         var_f.close()
                         logger.info(
                             "Done with merge_id: {}".format(len(merged_tsvs)))
@@ -102,13 +102,13 @@ def merge_tsvs(input_tsvs, out,
                         var_idx = []
     if not var_f.closed:
         var_idx.append(var_f.tell())
-        pickle.dump(var_idx, open(var_file + ".idx", "w"))
+        pickle.dump(var_idx, open(var_file + ".idx", "wb"))
         var_f.close()
         logger.info("Done with merge_id: {}".format(len(merged_tsvs)))
         merged_tsvs.append(var_file)
     if not keep_none_types and not none_f.closed:
         none_idx.append(none_f.tell())
-        pickle.dump(none_idx, open(none_file + ".idx", "w"))
+        pickle.dump(none_idx, open(none_file + ".idx", "wb"))
         none_f.close()
         logger.info("Done with merge_id: {}".format(len(merged_tsvs)))
         merged_tsvs.append(none_file)
diff --git a/neusomatic/python/network.py b/neusomatic/python/network.py
index 35ad45b..da685b7 100755
--- a/neusomatic/python/network.py
+++ b/neusomatic/python/network.py
@@ -20,13 +20,13 @@ def __init__(self, dim, ks_1=3, ks_2=3, dl_1=1, dl_2=1, mp_ks=3, mp_st=1):
         super(NSBlock, self).__init__()
         self.dim = dim
         self.conv_r1 = nn.Conv2d(
-            dim, dim, kernel_size=ks_1, dilation=dl_1, padding=(dl_1 * (ks_1 - 1)) / 2)
+            dim, dim, kernel_size=ks_1, dilation=dl_1, padding=(dl_1 * (ks_1 - 1)) // 2)
         self.bn_r1 = nn.BatchNorm2d(dim)
         self.conv_r2 = nn.Conv2d(
-            dim, dim, kernel_size=ks_2, dilation=dl_2, padding=(dl_2 * (ks_2 - 1)) / 2)
+            dim, dim, kernel_size=ks_2, dilation=dl_2, padding=(dl_2 * (ks_2 - 1)) // 2)
         self.bn_r2 = nn.BatchNorm2d(dim)
         self.pool_r2 = nn.MaxPool2d((1, mp_ks), padding=(
-            0, (mp_ks - 1) / 2), stride=(1, mp_st))
+            0, (mp_ks - 1) // 2), stride=(1, mp_st))
 
     def forward(self, x):
         y1 = (F.relu(self.bn_r1(self.conv_r1(x))))
@@ -56,8 +56,8 @@ def __init__(self, num_channels):
             rb = NSBlock(dim, ks_1, ks_2, dl_1, dl_2, mp_ks, mp_st)
             res_layers.append(rb)
         self.res_layers = nn.Sequential(*res_layers)
-        ds = np.prod(map(lambda x: x[5], self.nsblocks))
-        self.fc_dim = dim * 32 * 5 / ds
+        ds = np.prod(list(map(lambda x: x[5], self.nsblocks)))
+        self.fc_dim = dim * 32 * 5 // ds
         self.fc1 = nn.Linear(self.fc_dim, 240)
         self.fc2 = nn.Linear(240, 4)
         self.fc3 = nn.Linear(240, 1)
diff --git a/neusomatic/python/postprocess.py b/neusomatic/python/postprocess.py
index 878b70f..87834f2 100755
--- a/neusomatic/python/postprocess.py
+++ b/neusomatic/python/postprocess.py
@@ -86,7 +86,7 @@ def add_vcf_info(work, reference, merged_vcf, candidates_vcf, ensemble_tsv,
             if tag not in tags_info:
                 tags_info[tag] = []
             info = x[19].split(":")
-            dp, ro, ao = map(int, info[1:4])
+            dp, ro, ao = list(map(int, info[1:4]))
             af = float(info[4])
             is_same = x[1] == x[11] and x[3] == x[13] and x[4] == x[14]
             is_same_type = np.sign(
@@ -96,7 +96,7 @@ def add_vcf_info(work, reference, merged_vcf, candidates_vcf, ensemble_tsv,
             tags_info[tag].append(
                 [~is_same, ~is_same_type, dist, len_diff, s_e, dp, ro, ao, af])
     fina_info_tag = {}
-    for tag, hits in tags_info.iteritems():
+    for tag, hits in tags_info.items():
         hits = sorted(hits, key=lambda x: x[0:5])
         fina_info_tag[tag] = hits[0][5:]
 
@@ -105,8 +105,8 @@ def add_vcf_info(work, reference, merged_vcf, candidates_vcf, ensemble_tsv,
         fina_info_tag[tag] = [0, 0, 0, 0]
         scores[tag] = [x[5], x[6], x[7], x[9]]
 
-    tags = sorted(fina_info_tag.keys(), key=lambda x: map(int, x.split("-")[0:2]
-                                                          ))
+    tags = sorted(fina_info_tag.keys(), key=lambda x: list(map(int, x.split("-")[0:2]
+                                                          )))
     with open(output_vcf, "w") as o_f:
         o_f.write("##fileformat=VCFv4.2\n")
         o_f.write("##NeuSomatic Version={}\n".format(__version__))
diff --git a/neusomatic/python/preprocess.py b/neusomatic/python/preprocess.py
index 40d297b..ca1dd25 100755
--- a/neusomatic/python/preprocess.py
+++ b/neusomatic/python/preprocess.py
@@ -22,7 +22,8 @@
 from utils import concatenate_vcfs
 
 
-def split_dbsnp((restart, dbsnp, region_bed, dbsnp_region_vcf)):
+def split_dbsnp(record):
+    restart, dbsnp, region_bed, dbsnp_region_vcf = record
     thread_logger = logging.getLogger(
         "{} ({})".format(split_dbsnp.__name__, multiprocessing.current_process().name))
     try:
@@ -115,7 +116,7 @@ def process_split_region(tn, work, region, reference, mode, alignment_bam, dbsnp
                     dbsnp_regions.append(dbsnp_region_vcf)
     else:
         filtered_candidates_vcfs = None
-    return map(lambda x: x[1], scan_outputs), map(lambda x: x[2], scan_outputs), filtered_candidates_vcfs, dbsnp_regions
+    return list(map(lambda x: x[1], scan_outputs)), list(map(lambda x: x[2], scan_outputs)), filtered_candidates_vcfs, dbsnp_regions
 
 
 def generate_dataset_region(work, truth_vcf, mode, filtered_candidates_vcf, region, tumor_count_bed, normal_count_bed, reference,
@@ -126,7 +127,8 @@ def generate_dataset_region(work, truth_vcf, mode, filtered_candidates_vcf, regi
                      tsv_batch_size)
 
 
-def get_ensemble_region((reference, ensemble_bed, region, ensemble_bed_region_file, matrix_base_pad)):
+def get_ensemble_region(record):
+    reference, ensemble_bed, region, ensemble_bed_region_file, matrix_base_pad = record
     thread_logger = logging.getLogger(
         "{} ({})".format(get_ensemble_region.__name__, multiprocessing.current_process().name))
     try:
diff --git a/neusomatic/python/resolve_scores.py b/neusomatic/python/resolve_scores.py
index 297e02a..a2897c6 100755
--- a/neusomatic/python/resolve_scores.py
+++ b/neusomatic/python/resolve_scores.py
@@ -35,7 +35,7 @@ def resolve_scores(input_bam, ra_vcf, target_vcf, output_vcf):
             intervals_dict[id_] = []
         intervals_dict[id_].append(interval)
 
-    for id_, intervals in intervals_dict.iteritems():
+    for id_, intervals in intervals_dict.items():
         if len(intervals) == 1:
             score = intervals[0][15]
             interval = intervals[0][:10]
@@ -45,15 +45,15 @@ def resolve_scores(input_bam, ra_vcf, target_vcf, output_vcf):
         else:
             len_ = (len(intervals[0][4]) - len(intervals[0][3]))
             pos_ = int(intervals[0][1])
-            len_diff = map(lambda x: abs(
-                (len(x[14]) - len(x[13])) - len_), intervals)
+            len_diff = list(map(lambda x: abs(
+                (len(x[14]) - len(x[13])) - len_), intervals))
             min_len_diff = min(len_diff)
-            intervals = filter(lambda x: abs(
-                (len(x[14]) - len(x[13])) - len_) == min_len_diff, intervals)
-            pos_diff = map(lambda x: abs(int(x[11]) - pos_), intervals)
+            intervals = list(filter(lambda x: abs(
+                (len(x[14]) - len(x[13])) - len_) == min_len_diff, intervals))
+            pos_diff = list(map(lambda x: abs(int(x[11]) - pos_), intervals))
             min_pos_diff = min(pos_diff)
-            intervals = filter(lambda x: abs(
-                int(x[11]) - pos_) == min_pos_diff, intervals)
+            intervals = list(filter(lambda x: abs(
+                int(x[11]) - pos_) == min_pos_diff, intervals))
             score = "{:.4f}".format(
                 np.round(max(map(lambda x: float(x[15]), intervals)), 4))
             interval = intervals[0][:10]
diff --git a/neusomatic/python/resolve_variants.py b/neusomatic/python/resolve_variants.py
index 20332fe..39181d8 100755
--- a/neusomatic/python/resolve_variants.py
+++ b/neusomatic/python/resolve_variants.py
@@ -70,19 +70,20 @@ def extract_ins(record):
     return inss
 
 
-def find_resolved_variants((chrom, start, end, variants, input_bam, reference)):
+def find_resolved_variants(input_record):
+    chrom, start, end, variants, input_bam, reference = input_record
     thread_logger = logging.getLogger(
         "{} ({})".format(find_resolved_variants.__name__, multiprocessing.current_process().name))
     try:
         ref = pysam.FastaFile(reference)
         out_variants = []
-        start, end = map(int, [start, end])
+        start, end = list(map(int, [start, end]))
         region = [chrom, start, end]
-        vartypes = map(lambda x: x[-1], variants)
-        scores = map(lambda x: x[5], variants)
+        vartypes = list(map(lambda x: x[-1], variants))
+        scores = list(map(lambda x: x[5], variants))
         if len(set(vartypes)) > 1:
             out_variants.extend(
-                map(lambda x: [x[0], int(x[1]), x[3], x[4], x[10], x[5]], variants))
+                list(map(lambda x: [x[0], int(x[1]), x[3], x[4], x[10], x[5]], variants)))
         else:
             vartype = vartypes[0]
             score = max(scores)
@@ -93,13 +94,13 @@ def find_resolved_variants((chrom, start, end, variants, input_bam, reference)):
                     for record in samfile.fetch(chrom, start, end):
                         if record.cigarstring and "D" in record.cigarstring:
                             dels.extend(extract_del(record))
-                dels = filter(lambda x: (
-                    start <= x[1] <= end) or start <= x[2] <= end, dels)
+                dels = list(filter(lambda x: (
+                    start <= x[1] <= end) or start <= x[2] <= end, dels))
                 if dels:
-                    intervals = map(lambda x: pybedtools.Interval(
-                        x[0], x[1], x[2]), dels)
-                    bed = pybedtools.BedTool(intervals)
-                    del_strs = map(lambda x: "---".join(x[0:3]), bed)
+                    intervals = list(map(lambda x: pybedtools.Interval(
+                        x[0], x[1], x[2]), dels))
+                    bed = pybedtools.BedTool(intervals).saveas()
+                    del_strs = list(map(lambda x: "---".join(x[0:3]), bed))
                     uniq_dels = list(set(del_strs))
                     uniq_dels_count = {}
                     for del_ in uniq_dels:
@@ -109,10 +110,10 @@ def find_resolved_variants((chrom, start, end, variants, input_bam, reference)):
                         if uniq_dels_count[del_] <= max_count * 0.5:
                             del uniq_dels_count[del_]
                     new_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(x[0], int(x[1]), int(x[2])),
-                                                     map(lambda x: x.split("---"), uniq_dels_count.keys())))
+                                                     list(map(lambda x: x.split("---"), uniq_dels_count.keys())))).saveas()
                     new_bed = new_bed.sort().merge(c=[1], o="count")
-                    out_variants.extend(map(lambda x: [x[0], int(x[1]), ref.fetch(x[0], int(
-                        x[1]) - 1, int(x[2])), ref.fetch(x[0], int(x[1]) - 1, int(x[1])), "0/1", score], new_bed))
+                    out_variants.extend(list(map(lambda x: [x[0], int(x[1]), ref.fetch(x[0], int(
+                        x[1]) - 1, int(x[2])), ref.fetch(x[0], int(x[1]) - 1, int(x[1])), "0/1", score], new_bed)))
             elif vartype == "INS":
                 intervals = []
                 inss = []
@@ -120,13 +121,13 @@ def find_resolved_variants((chrom, start, end, variants, input_bam, reference)):
                     for record in samfile.fetch(chrom, start, end):
                         if record.cigarstring and "I" in record.cigarstring:
                             inss.extend(extract_ins(record))
-                inss = filter(lambda x: (
-                    start <= x[1] <= end) or start <= x[2] <= end, inss)
+                inss = list(filter(lambda x: (
+                    start <= x[1] <= end) or start <= x[2] <= end, inss))
                 if inss:
-                    intervals = map(lambda x: pybedtools.Interval(
-                        x[0], x[1], x[2], x[3]), inss)
-                    bed = pybedtools.BedTool(intervals)
-                    ins_strs = map(lambda x: "---".join(x[0:4]), bed)
+                    intervals = list(map(lambda x: pybedtools.Interval(
+                        x[0], x[1], x[2], x[3]), inss))
+                    bed = pybedtools.BedTool(intervals).saveas()
+                    ins_strs = list(map(lambda x: "---".join(x[0:4]), bed))
                     uniq_inss = list(set(ins_strs))
                     uniq_inss_count = {}
                     for ins_ in uniq_inss:
@@ -138,9 +139,9 @@ def find_resolved_variants((chrom, start, end, variants, input_bam, reference)):
                         if uniq_inss_count[ins_] <= max_count * 0.5 or 0 < abs(int(ins_.split("---")[1]) - max_pos) < 4:
                             del uniq_inss_count[ins_]
                     new_bed = pybedtools.BedTool(map(lambda x: pybedtools.Interval(x[0], int(x[1]), int(x[2]), x[3]),
-                                                     map(lambda x: x.split("---"), uniq_inss_count.keys()))).sort()
-                    out_variants.extend(map(lambda x: [x[0], int(x[1]), ref.fetch(x[0], int(
-                        x[1]) - 1, int(x[1])), ref.fetch(x[0], int(x[1]) - 1, int(x[1])) + x[3], "0/1", score], new_bed))
+                                                     list(map(lambda x: x.split("---"), uniq_inss_count.keys())))).sort()
+                    out_variants.extend(list(map(lambda x: [x[0], int(x[1]), ref.fetch(x[0], int(
+                        x[1]) - 1, int(x[1])), ref.fetch(x[0], int(x[1]) - 1, int(x[1])) + x[3], "0/1", score], new_bed)))
         return out_variants
     except Exception as ex:
         thread_logger.error(traceback.format_exc())
diff --git a/neusomatic/python/scan_alignments.py b/neusomatic/python/scan_alignments.py
index c25e91f..820adb4 100755
--- a/neusomatic/python/scan_alignments.py
+++ b/neusomatic/python/scan_alignments.py
@@ -24,8 +24,10 @@
 from split_bed import split_region
 
 
-def run_scan_alignments((work, reference, scan_alignments_binary, split_region_file,
-                         input_bam, window_size, maf, min_mapq, max_dp, calc_qual, num_threads)):
+def run_scan_alignments(record):
+    work, reference, scan_alignments_binary, split_region_file, \
+    input_bam, window_size, maf, min_mapq, max_dp, calc_qual, num_threads = record
+
     thread_logger = logging.getLogger(
         "{} ({})".format(run_scan_alignments.__name__, multiprocessing.current_process().name))
     try:
@@ -80,7 +82,7 @@ def scan_alignments(work, scan_alignments_binary, input_bam,
             with pysam.AlignmentFile(input_bam, "rb") as samfile:
                 for chrom, length in zip(samfile.references, samfile.lengths):
                     intervals.append(pybedtools.Interval(chrom, 1, length - 1))
-            regions_bed = pybedtools.BedTool(intervals)
+            regions_bed = pybedtools.BedTool(intervals).saveas()
         if not os.path.exists(work):
             os.mkdir(work)
         total_len = sum(map(lambda x: int(x[2]) - int(x[1]) + 1, regions_bed))
@@ -98,7 +100,7 @@ def scan_alignments(work, scan_alignments_binary, input_bam,
             regions_bed_file = os.path.join(work, "all_regions.bed")
             regions_bed.saveas(regions_bed_file)
 
-            num_split = max(int(np.ceil((total_len / 10000000) /
+            num_split = max(int(np.ceil((total_len // 10000000) //
                                         num_threads) * num_threads), num_threads)
             split_region_files = split_region(work, regions_bed_file, num_split,
                                               min_region=window_size, max_region=1e20)
diff --git a/neusomatic/python/split_bed.py b/neusomatic/python/split_bed.py
index 78f620b..f4d91ff 100755
--- a/neusomatic/python/split_bed.py
+++ b/neusomatic/python/split_bed.py
@@ -30,10 +30,10 @@ def split_region(work, region_bed_file, num_splits, max_region=1000000, min_regi
             intervals.append(region)
     if shuffle_intervals:
         shuffle(intervals)
-    regions_bed = pybedtools.BedTool(intervals)
+    regions_bed = pybedtools.BedTool(intervals).saveas()
     total_len = sum(map(lambda x: int(x[2]) - int(x[1]) + 1, regions_bed))
     logger.info("Total length: {}".format(total_len))
-    split_len = total_len / num_splits
+    split_len = total_len // num_splits
     split_regions = []
     current_regions = []
     sofar_len = 0
diff --git a/neusomatic/python/train.py b/neusomatic/python/train.py
index 1d67d02..d7a06e9 100755
--- a/neusomatic/python/train.py
+++ b/neusomatic/python/train.py
@@ -41,21 +41,21 @@ def make_weights_for_balanced_classes(count_class_t, count_class_l, nclasses_t,
         count_class_l[0] = none_count
 
     logger.info("count type classes: {}".format(
-        zip(vartype_classes, count_class_t)))
+        list(zip(vartype_classes, count_class_t))))
     N = float(sum(count_class_t))
     for i in range(nclasses_t):
         w_t[i] = (1 - (float(count_class_t[i]) / float(N))) / float(nclasses_t)
     w_t = np.array(w_t)
-    logger.info("weight type classes: {}".format(zip(vartype_classes, w_t)))
+    logger.info("weight type classes: {}".format(list(zip(vartype_classes, w_t))))
 
-    logger.info("count length classes: {}".format(
-        zip(range(nclasses_l), count_class_l)))
+    logger.info("count length classes: {}".format(list(
+        zip(range(nclasses_l), count_class_l))))
     N = float(sum(count_class_l))
     for i in range(nclasses_l):
         w_l[i] = (1 - (float(count_class_l[i]) / float(N))) / float(nclasses_l)
     w_l = np.array(w_l)
-    logger.info("weight length classes: {}".format(
-        zip(range(nclasses_l), w_l)))
+    logger.info("weight length classes: {}".format(list(
+        zip(range(nclasses_l), w_l))))
     return w_t, w_l
 
 
@@ -154,8 +154,8 @@ def __iter__(self):
         logger = logging.getLogger(SubsetNoneSampler.__iter__.__name__)
         if self.current_none_id > (len(self.none_indices) - self.none_count):
             this_round_nones = self.none_indices[self.current_none_id:]
-            self.none_indices = map(lambda i: self.none_indices[i],
-                                    torch.randperm(len(self.none_indices)).tolist())
+            self.none_indices = list(map(lambda i: self.none_indices[i],
+                                    torch.randperm(len(self.none_indices)).tolist()))
             self.current_none_id = self.none_count - len(this_round_nones)
             this_round_nones += self.none_indices[0:self.current_none_id]
         else:
@@ -221,10 +221,10 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
         # 1. filter out unnecessary keys
         # pretrained_state_dict = {
         # k: v for k, v in pretrained_state_dict.items() if k in model_dict}
-        if "module." in pretrained_state_dict.keys()[0] and "module." not in model_dict.keys()[0]:
+        if "module." in list(pretrained_state_dict.keys())[0] and "module." not in list(model_dict.keys())[0]:
             pretrained_state_dict = {k.split("module.")[1]: v for k, v in pretrained_state_dict.items(
             ) if k.split("module.")[1] in model_dict}
-        elif "module." not in pretrained_state_dict.keys()[0] and "module." in model_dict.keys()[0]:
+        elif "module." not in list(pretrained_state_dict.keys())[0] and "module." in list(model_dict.keys())[0]:
             pretrained_state_dict = {
                 ("module." + k): v for k, v in pretrained_state_dict.items() if ("module." + k) in model_dict}
         else:
@@ -251,11 +251,11 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
 
     Ls = []
     for tsv in candidates_tsv:
-        idx = pickle.load(open(tsv + ".idx"))
+        idx = pickle.load(open(tsv + ".idx", "rb"))
         Ls.append(len(idx) - 1)
 
-    Ls, candidates_tsv = zip(
-        *sorted(zip(Ls, candidates_tsv), key=lambda x: x[0], reverse=True))
+    Ls, candidates_tsv = list(zip(
+        *sorted(zip(Ls, candidates_tsv), key=lambda x: x[0], reverse=True)))
 
     train_split_tsvs = []
     current_L = 0
@@ -287,13 +287,13 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
         none_indices = train_set.get_none_indices()
         var_indices = train_set.get_var_indices()
         if none_indices:
-            none_indices = map(lambda i: none_indices[i],
-                               torch.randperm(len(none_indices)).tolist())
+            none_indices = list(map(lambda i: none_indices[i],
+                               torch.randperm(len(none_indices)).tolist()))
         logger.info(
             "Non-somatic candidates is split {}: {}".format(split_i, len(none_indices)))
         if var_indices:
-            var_indices = map(lambda i: var_indices[i],
-                              torch.randperm(len(var_indices)).tolist())
+            var_indices = list(map(lambda i: var_indices[i],
+                              torch.randperm(len(var_indices)).tolist()))
         logger.info("Somatic candidates in split {}: {}".format(
             split_i, len(var_indices)))
         none_count = max(min(len(none_indices), len(
diff --git a/neusomatic/python/utils.py b/neusomatic/python/utils.py
index c09e543..b8eb237 100755
--- a/neusomatic/python/utils.py
+++ b/neusomatic/python/utils.py
@@ -48,8 +48,8 @@ def concatenate_files(infiles, outfile, check_file_existence=True):
 def concatenate_vcfs(infiles, outfile, check_file_existence=True, header_string="#"):
     with open(outfile, "w") as out_fd:
         # Only keep files which exist
-        files_to_process = filter(lambda f: f and (
-            not check_file_existence or os.path.isfile(f)), infiles)
+        files_to_process = list(filter(lambda f: f and (
+            not check_file_existence or os.path.isfile(f)), infiles))
 
         for index, infile in enumerate(files_to_process):
             with open(infile) as in_fd:
diff --git a/test/NeuSomatic_ensemble.vcf b/test/NeuSomatic_ensemble.vcf
index 72ff1b7..a00b347 100644
--- a/test/NeuSomatic_ensemble.vcf
+++ b/test/NeuSomatic_ensemble.vcf
@@ -11,10 +11,10 @@
 ##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele fractions of alternate alleles in the tumor">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE
 22	21330787	.	C	T	26.9905	PASS	SCORE=0.9980;DP=320;RO=231;AO=87;AF=0.2736;	GT:DP:RO:AO:AF	0/1:320:231:87:0.2736
-22	21332122	.	G	A	40.0000	PASS	SCORE=0.9999;DP=210;RO=152;AO=58;AF=0.2762;	GT:DP:RO:AO:AF	0/1:210:152:58:0.2762
+22	21332122	.	G	A	39.9993	PASS	SCORE=0.9999;DP=210;RO=152;AO=58;AF=0.2762;	GT:DP:RO:AO:AF	0/1:210:152:58:0.2762
 22	21334924	.	G	C	22.2248	PASS	SCORE=0.9940;DP=78;RO=58;AO=20;AF=0.2564;	GT:DP:RO:AO:AF	0/1:78:58:20:0.2564
-22	21335259	.	C	A	29.2086	PASS	SCORE=0.9988;DP=205;RO=158;AO=47;AF=0.2293;	GT:DP:RO:AO:AF	0/1:205:158:47:0.2293
+22	21335259	.	C	A	29.2085	PASS	SCORE=0.9988;DP=205;RO=158;AO=47;AF=0.2293;	GT:DP:RO:AO:AF	0/1:205:158:47:0.2293
 22	21384516	.	C	T	33.0106	PASS	SCORE=0.9995;DP=78;RO=51;AO=27;AF=0.3462;	GT:DP:RO:AO:AF	0/1:78:51:27:0.3462
-22	21982892	.	C	T	25.6891	PASS	SCORE=0.9973;DP=131;RO=86;AO=45;AF=0.3435;	GT:DP:RO:AO:AF	0/1:131:86:45:0.3435
-22	21983260	.	A	G	26.5777	PASS	SCORE=0.9978;DP=85;RO=41;AO=44;AF=0.5176;	GT:DP:RO:AO:AF	0/1:85:41:44:0.5176
-22	21989959	.	AAG	A	36.9899	PASS	SCORE=0.9998;DP=100;RO=69;AO=29;AF=0.2959;	GT:DP:RO:AO:AF	0/1:100:69:29:0.2959
+22	21982892	.	C	T	25.6892	PASS	SCORE=0.9973;DP=131;RO=86;AO=45;AF=0.3435;	GT:DP:RO:AO:AF	0/1:131:86:45:0.3435
+22	21983260	.	A	G	26.5776	PASS	SCORE=0.9978;DP=85;RO=41;AO=44;AF=0.5176;	GT:DP:RO:AO:AF	0/1:85:41:44:0.5176
+22	21989959	.	AAG	A	36.9896	PASS	SCORE=0.9998;DP=100;RO=69;AO=29;AF=0.2959;	GT:DP:RO:AO:AF	0/1:100:69:29:0.2959
diff --git a/test/NeuSomatic_standalone.vcf b/test/NeuSomatic_standalone.vcf
index 3288f06..20b54fd 100644
--- a/test/NeuSomatic_standalone.vcf
+++ b/test/NeuSomatic_standalone.vcf
@@ -10,11 +10,11 @@
 ##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count in the tumor">
 ##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele fractions of alternate alleles in the tumor">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE
-22	21330787	.	C	T	33.9797	PASS	SCORE=0.9996;DP=320;RO=231;AO=87;AF=0.2736;	GT:DP:RO:AO:AF	0/1:320:231:87:0.2736
-22	21332122	.	G	A	36.9897	PASS	SCORE=0.9998;DP=210;RO=152;AO=58;AF=0.2762;	GT:DP:RO:AO:AF	0/1:210:152:58:0.2762
+22	21330787	.	C	T	33.9800	PASS	SCORE=0.9996;DP=320;RO=231;AO=87;AF=0.2736;	GT:DP:RO:AO:AF	0/1:320:231:87:0.2736
+22	21332122	.	G	A	36.9903	PASS	SCORE=0.9998;DP=210;RO=152;AO=58;AF=0.2762;	GT:DP:RO:AO:AF	0/1:210:152:58:0.2762
 22	21334924	.	G	C	4.5474	LowQual	SCORE=0.6490;DP=78;RO=58;AO=20;AF=0.2564;	GT:DP:RO:AO:AF	0/1:78:58:20:0.2564
 22	21335259	.	C	A	25.8517	PASS	SCORE=0.9974;DP=205;RO=158;AO=47;AF=0.2293;	GT:DP:RO:AO:AF	0/1:205:158:47:0.2293
 22	21384516	.	C	T	8.2334	PASS	SCORE=0.8498;DP=78;RO=51;AO=27;AF=0.3462;	GT:DP:RO:AO:AF	0/1:78:51:27:0.3462
-22	21982892	.	C	T	30.0011	PASS	SCORE=0.9990;DP=131;RO=86;AO=45;AF=0.3435;	GT:DP:RO:AO:AF	0/1:131:86:45:0.3435
-22	21983260	.	A	G	31.5496	PASS	SCORE=0.9993;DP=85;RO=41;AO=44;AF=0.5176;	GT:DP:RO:AO:AF	0/1:85:41:44:0.5176
-22	21989959	.	AAG	A	40.0000	PASS	SCORE=0.9999;DP=100;RO=69;AO=29;AF=0.2959;	GT:DP:RO:AO:AF	0/1:100:69:29:0.2959
+22	21982892	.	C	T	30.0008	PASS	SCORE=0.9990;DP=131;RO=86;AO=45;AF=0.3435;	GT:DP:RO:AO:AF	0/1:131:86:45:0.3435
+22	21983260	.	A	G	31.5498	PASS	SCORE=0.9993;DP=85;RO=41;AO=44;AF=0.5176;	GT:DP:RO:AO:AF	0/1:85:41:44:0.5176
+22	21989959	.	AAG	A	39.9993	PASS	SCORE=0.9999;DP=100;RO=69;AO=29;AF=0.2959;	GT:DP:RO:AO:AF	0/1:100:69:29:0.2959

From 8269698004eb8ed98437832e872d012912ac6804 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb069login.ib.rsshpc1.sc1.science.roche.com>
Date: Thu, 31 Jan 2019 14:34:53 -0800
Subject: [PATCH 3/7] few fixes

---
 .gitignore                                  |  1 +
 neusomatic/python/long_read_indelrealign.py | 46 ++++++++++-----------
 neusomatic/python/postprocess.py            | 10 ++---
 neusomatic/python/utils.py                  |  4 +-
 4 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8ec2c20..111da70 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ third_party/*
 !third_party/*.cmake
 neusomatic/build/
 neusomatic/bin/*
+neusomatic/python/__pycache__/
 others/
 neusomatic/python/*.pyc
 test/example/
diff --git a/neusomatic/python/long_read_indelrealign.py b/neusomatic/python/long_read_indelrealign.py
index 1f2da92..7b4ee36 100755
--- a/neusomatic/python/long_read_indelrealign.py
+++ b/neusomatic/python/long_read_indelrealign.py
@@ -368,9 +368,9 @@ def prepare_fasta(work, region, input_bam, ref_fasta_file, include_ref, split_i)
     return in_fasta_file, info_file
 
 
-def split_bam_to_chuncks(work, region, input_bam, chunck_size=200,
-                         chunck_scale=1.5):
-    logger = logging.getLogger(split_bam_to_chuncks.__name__)
+def split_bam_to_chunks(work, region, input_bam, chunk_size=200,
+                         chunk_scale=1.5):
+    logger = logging.getLogger(split_bam_to_chunks.__name__)
     records = []
     with pysam.Samfile(input_bam, "rb") as samfile:
         for record in samfile.fetch(region.chrom, region.start, region.end + 1):
@@ -404,22 +404,22 @@ def split_bam_to_chuncks(work, region, input_bam, chunck_size=200,
             records.append([record, len(q_seq)])
 
     records = list(map(lambda x: x[0], sorted(records, key=lambda x: x[1])))
-    if len(records) < chunck_size * chunck_scale:
+    if len(records) < chunk_size * chunk_scale:
         bams = [input_bam]
         lens = [len(records)]
     else:
-        n_splits = int(max(6, len(records) // chunck_size))
-        new_chunck_size = len(records) // n_splits
+        n_splits = int(max(6, len(records) // chunk_size))
+        new_chunk_size = len(records) // n_splits
         bams = []
         lens = []
-        n_split = (len(records) // new_chunck_size) + 1
-        if 0 < (len(records) - ((n_split - 1) * new_chunck_size) + new_chunck_size) \
-                < new_chunck_size * chunck_scale:
+        n_split = (len(records) // new_chunk_size) + 1
+        if 0 < (len(records) - ((n_split - 1) * new_chunk_size) + new_chunk_size) \
+                < new_chunk_size * chunk_scale:
             n_split -= 1
         for i in range(n_split):
-            i_start = i * new_chunck_size
+            i_start = i * new_chunk_size
             i_end = (i + 1) * \
-                new_chunck_size if i < (n_split - 1) else len(records)
+                new_chunk_size if i < (n_split - 1) else len(records)
             split_input_bam = os.path.join(
                 work, region.__str__() + "_split_{}.bam".format(i))
             with pysam.AlignmentFile(input_bam, "rb") as samfile:
@@ -442,8 +442,8 @@ def read_info(info_file):
     logger = logging.getLogger(read_info.__name__)
     info = {}
     with open(info_file, 'r') as csvfile:
-        spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
-        for row in spamreader:
+        csvreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
+        for row in csvreader:
             info[int(row[0])] = Read_Info(row[1:])
     return info
 
@@ -900,7 +900,7 @@ def TrimREFALT(ref, alt, pos):
 
 
 def run_realignment(input_record):
-    work, ref_fasta_file, target_region, pad, chunck_size, chunck_scale, \
+    work, ref_fasta_file, target_region, pad, chunk_size, chunk_scale, \
     snp_min_af, del_min_af, ins_min_af, len_chr, input_bam, \
     match_score, mismatch_penalty, gap_open_penalty, gap_ext_penalty, \
     msa_binary, get_var = input_record
@@ -918,8 +918,8 @@ def run_realignment(input_record):
         pybedtools.set_tempdir(pybedtmp)
         variant = []
         all_entries = []
-        input_bam_splits, lens_splits = split_bam_to_chuncks(
-            work, region, input_bam, chunck_size, chunck_scale)
+        input_bam_splits, lens_splits = split_bam_to_chunks(
+            work, region, input_bam, chunk_size, chunk_scale)
         new_seqs = []
         new_ref_seq = ""
         skipped = 0
@@ -1163,7 +1163,7 @@ def extend_regions_repeat(region_bed_file, extended_region_bed_file, ref_fasta_f
 
 def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_file,
                            ref_fasta_file, num_threads, pad,
-                           chunck_size, chunck_scale, snp_min_af, del_min_af, ins_min_af,
+                           chunk_size, chunk_scale, snp_min_af, del_min_af, ins_min_af,
                            match_score, mismatch_penalty, gap_open_penalty, gap_ext_penalty,
                            msa_binary):
     logger = logging.getLogger(long_read_indelrealign.__name__)
@@ -1210,8 +1210,8 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
     pool = multiprocessing.Pool(num_threads)
     map_args = []
     for target_region in target_regions:
-        map_args.append((work, ref_fasta_file, target_region, pad, chunck_size,
-                         chunck_scale, snp_min_af, del_min_af, ins_min_af,
+        map_args.append((work, ref_fasta_file, target_region, pad, chunk_size,
+                         chunk_scale, snp_min_af, del_min_af, ins_min_af,
                          chrom_lengths[target_region[0]], input_bam,
                          match_score, mismatch_penalty, gap_open_penalty, gap_ext_penalty,
                          msa_binary, get_var))
@@ -1299,9 +1299,9 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
                         help='number of threads', default=1)
     parser.add_argument('--pad', type=int,
                         help='#base padding to the regions', default=1)
-    parser.add_argument('--chunck_size', type=int,
+    parser.add_argument('--chunk_size', type=int,
                         help='chuck split size for high depth', default=600)
-    parser.add_argument('--chunck_scale', type=float,
+    parser.add_argument('--chunk_scale', type=float,
                         help='chuck scale size for high depth', default=1.5)
     parser.add_argument('--snp_min_af', type=float,
                         help='SNP min allele freq', default=0.05)
@@ -1325,8 +1325,8 @@ def long_read_indelrealign(work, input_bam, output_bam, output_vcf, region_bed_f
     try:
         processor = long_read_indelrealign(args.work, args.input_bam, args.output_bam,
                                            args.output_vcf, args.region_bed, args.reference,
-                                           args.num_threads, args.pad, args.chunck_size,
-                                           args.chunck_scale, args.snp_min_af, args.del_min_af,
+                                           args.num_threads, args.pad, args.chunk_size,
+                                           args.chunk_scale, args.snp_min_af, args.del_min_af,
                                            args.ins_min_af, args.match_score,
                                            args.mismatch_penalty, args.gap_open_penalty,
                                            args.gap_ext_penalty, args.msa_binary)
diff --git a/neusomatic/python/postprocess.py b/neusomatic/python/postprocess.py
index 87834f2..d3d9a63 100755
--- a/neusomatic/python/postprocess.py
+++ b/neusomatic/python/postprocess.py
@@ -143,7 +143,7 @@ def add_vcf_info(work, reference, merged_vcf, candidates_vcf, ensemble_tsv,
 def postprocess(work, reference, pred_vcf_file, output_vcf, candidates_vcf, ensemble_tsv,
                 tumor_bam, min_len,
                 postprocess_max_dist, long_read,
-                lr_pad, lr_chunck_size, lr_chunck_scale,
+                lr_pad, lr_chunk_size, lr_chunk_scale,
                 lr_snp_min_af, lr_ins_min_af, lr_del_min_af, lr_match_score, lr_mismatch_penalty,
                 lr_gap_open_penalty, lr_gap_ext_penalty,
                 pass_threshold, lowqual_threshold,
@@ -183,7 +183,7 @@ def postprocess(work, reference, pred_vcf_file, output_vcf, candidates_vcf, ense
             work, "candidates_preds.ra_resolved.vcf")
         long_read_indelrealign(work_lr_indel_realign, tumor_bam, None, ra_resolved_vcf, target_bed,
                                reference, num_threads, lr_pad,
-                               lr_chunck_size, lr_chunck_scale, lr_snp_min_af,
+                               lr_chunk_size, lr_chunk_scale, lr_snp_min_af,
                                lr_del_min_af, lr_ins_min_af,
                                lr_match_score, lr_mismatch_penalty, lr_gap_open_penalty,
                                lr_gap_ext_penalty, msa_binary)
@@ -233,9 +233,9 @@ def postprocess(work, reference, pred_vcf_file, output_vcf, candidates_vcf, ense
         '--long_read', help='Enable long_read (high error-rate sequence) indel realignment', action="store_true")
     parser.add_argument(
         '--lr_pad', type=int, help='long_read indel realign: #base padding to the regions', default=1)
-    parser.add_argument('--lr_chunck_size', type=int,
+    parser.add_argument('--lr_chunk_size', type=int,
                         help='long_read indel realign: chuck split size for high depth', default=600)
-    parser.add_argument('--lr_chunck_scale', type=float,
+    parser.add_argument('--lr_chunk_scale', type=float,
                         help='long_read indel realign: chuck scale size for high depth', default=1.5)
     parser.add_argument('--lr_snp_min_af', type=float,
                         help='long_read indel realign: SNP min allele freq', default=0.05)
@@ -270,7 +270,7 @@ def postprocess(work, reference, pred_vcf_file, output_vcf, candidates_vcf, ense
                                  args.candidates_vcf, args.ensemble_tsv,
                                  args.tumor_bam, args.min_len,
                                  args.postprocess_max_dist, args.long_read,
-                                 args.lr_pad, args.lr_chunck_size, args.lr_chunck_scale,
+                                 args.lr_pad, args.lr_chunk_size, args.lr_chunk_scale,
                                  args.lr_snp_min_af, args.lr_ins_min_af, args.lr_del_min_af,
                                  args.lr_match_score, args.lr_mismatch_penalty,
                                  args.lr_gap_open_penalty,
diff --git a/neusomatic/python/utils.py b/neusomatic/python/utils.py
index b8eb237..c09e543 100755
--- a/neusomatic/python/utils.py
+++ b/neusomatic/python/utils.py
@@ -48,8 +48,8 @@ def concatenate_files(infiles, outfile, check_file_existence=True):
 def concatenate_vcfs(infiles, outfile, check_file_existence=True, header_string="#"):
     with open(outfile, "w") as out_fd:
         # Only keep files which exist
-        files_to_process = list(filter(lambda f: f and (
-            not check_file_existence or os.path.isfile(f)), infiles))
+        files_to_process = filter(lambda f: f and (
+            not check_file_existence or os.path.isfile(f)), infiles)
 
         for index, infile in enumerate(files_to_process):
             with open(infile) as in_fd:

From 7f57f89b749f8694199758271fd8ff6ff13d82a2 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb125hmem.ib.rsshpc1.sc1.science.roche.com>
Date: Wed, 27 Feb 2019 16:25:00 -0800
Subject: [PATCH 4/7] updated version to 0.2.0

---
 .gitignore                     |  1 -
 README.md                      | 13 ++++++-----
 docker/Dockerfile              | 41 ++++++++++++++++++----------------
 neusomatic/python/_version.py  |  2 +-
 neusomatic/python/call.py      |  4 ++--
 test/NeuSomatic_ensemble.vcf   |  2 +-
 test/NeuSomatic_standalone.vcf |  2 +-
 test/docker_test.sh            | 18 +++++++--------
 8 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8868922..903e584 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@ third_party/*
 neusomatic/python/__pycache__/
 neusomatic/build/
 neusomatic/bin/*
-neusomatic/python/__pycache__/
 others/
 neusomatic/python/*.pyc
 test/example/
diff --git a/README.md b/README.md
index f0f62f6..2cbff6d 100644
--- a/README.md
+++ b/README.md
@@ -29,32 +29,33 @@ doi: https://doi.org/10.1101/393801](https://doi.org/10.1101/393801)
 
 ## Availability
 
-NeuSomatic is written in Python and C++ and requires a Unix-like environment to run. It has been sucessfully tested on CentOS 7. Its deep learning framework is implemented using PyTorch 1.0.0 to enable GPU acceleration for training/testing.
+NeuSomatic is written in Python and C++ and requires a Unix-like environment to run. It has been sucessfully tested on CentOS 7. Its deep learning framework is implemented using PyTorch 1.0.1 to enable GPU acceleration for training/testing.
 
 NeuSomatic first scans the genome to identify candidate variants and extract alignment information. 
 The binary for this step can be obtained at `neusomatic/bin` folder by running `./build.sh` (which requires cmake 3.13.2 and g++ 5.4.0).
 
-Python 2.7 and the following Python packages must be installed:
-* pytorch 1.0.0
+Python 3.7 and the following Python packages must be installed:
+* pytorch 1.0.1
 * torchvision 0.2.1
 * pybedtools 0.8.0
 * pysam 0.15.2
 * zlib 1.2.11
 * numpy 1.15.4
 * scipy 1.2.0
+* imageio 2.5.0
 * biopython 1.73
 
 It also depends on the following packages:
-* cuda80 1.0 (if you want to use GPU)
+* cudatoolkit 8.0 (if you want to use GPU)
 * tabix 0.2.6
 * bedtools 2.27.1
 * samtools 1.9
 
 You can install these packages using [anaconda](https://www.anaconda.com/download)/[miniconda](https://conda.io/miniconda.html) :
 ```
-conda install zlib=1.2.11 numpy=1.15.4 scipy=1.2.0 cmake=3.13.2
+conda install zlib=1.2.11 numpy=1.15.4 scipy=1.2.0 cmake=3.13.2 imageio=2.5.0
 conda install pysam=0.15.2 pybedtools=0.8.0 samtools=1.9 tabix=0.2.6 bedtools=2.27.1 biopython=1.73 -c bioconda
-conda install pytorch=1.0.0 torchvision=0.2.1 cuda80==1.0 -c pytorch
+conda install pytorch=1.0.1 torchvision=0.2.1 cudatoolkit=8.0 -c pytorch
 ```
 Then you can export the conda paths as:
 ```
diff --git a/docker/Dockerfile b/docker/Dockerfile
index ff905d7..861e9e0 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,40 +1,43 @@
 FROM ubuntu:16.04
 		
-ENV NEUSOMATIC_VERSION 0.1.4
+ENV NEUSOMATIC_VERSION 0.2.0
 ENV ZLIB_VERSION 1.2.11
-ENV NUMPY_VERSION 1.14.3
-ENV SCIPY_VERSION 1.1.0
-ENV PYTORCH_VERSION 0.3.1
-ENV TORCHVISION_VERSION 0.2.0
-ENV CMAKE_VERSION 3.12.1
-ENV PYBEDTOOLS_VERSION 0.7.10
-ENV PYSAM_VERSION 0.14.1
-ENV SAMTOOLS_VERSION 1.7
-ENV TABIX_VERSION 0.2.5
+ENV NUMPY_VERSION 1.15.4
+ENV SCIPY_VERSION 1.2.0
+ENV IMAGEIO_VERSION 2.5.0
+ENV PYTORCH_VERSION 1.0.1
+ENV TORCHVISION_VERSION 0.2.1
+ENV CUDATOOLKIT_VERSION=8.0
+ENV CMAKE_VERSION 3.13.2
+ENV PYBEDTOOLS_VERSION 0.8.0
+ENV PYSAM_VERSION 0.15.2
+ENV SAMTOOLS_VERSION 1.9
+ENV TABIX_VERSION 0.2.6
 ENV BEDTOOLS_VERSION 2.27.1
-ENV BIOPYTHON_VERSION 1.68
+ENV BIOPYTHON_VERSION 1.73
 ENV GCC_VERSION 5
 
 RUN apt-get update && apt-get install -y --fix-missing \
 				build-essential zlib1g-dev curl less vim bzip2
 RUN apt-get install -y --fix-missing git wget
 
-RUN curl -LO http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
-RUN bash Miniconda-latest-Linux-x86_64.sh -p /miniconda -b
-RUN rm Miniconda-latest-Linux-x86_64.sh
+RUN curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+RUN bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b
+RUN rm Miniconda3-latest-Linux-x86_64.sh
 ENV PATH=/miniconda/bin:${PATH}
 ENV LD_LIBRARY_PATH=/miniconda/lib:${LD_LIBRARY_PATH}
 RUN conda update -y conda
 
-RUN conda install -y zlib=${ZLIB_VERSION} numpy=${NUMPY_VERSION} \
-					 scipy=${SCIPY_VERSION} && conda clean -a
-RUN conda install -y pytorch=${PYTORCH_VERSION} \
-					 torchvision=${TORCHVISION_VERSION} -c pytorch && conda clean -a
-RUN conda install -y cmake=${CMAKE_VERSION} -c conda-forge && conda clean -a
+
+RUN conda install -y zlib=${ZLIB_VERSION} numpy=${NUMPY_VERSION} scipy=${SCIPY_VERSION}\
+					 imageio=${IMAGEIO_VERSION} cmake=${CMAKE_VERSION} && conda clean -a
 RUN conda install -y pysam=${PYSAM_VERSION} pybedtools=${PYBEDTOOLS_VERSION} \
 					 samtools=${SAMTOOLS_VERSION} tabix=${TABIX_VERSION} \
 					 bedtools=${BEDTOOLS_VERSION} \
 					 biopython=${BIOPYTHON_VERSION} -c bioconda && conda clean -a
+RUN conda install -y pytorch=${PYTORCH_VERSION} \
+					 torchvision=${TORCHVISION_VERSION} \
+					 cudatoolkit=${CUDATOOLKIT_VERSION}-c pytorch && conda clean -a
 
 RUN apt-get install -y --fix-missing gcc-${GCC_VERSION} g++-${GCC_VERSION}
 
diff --git a/neusomatic/python/_version.py b/neusomatic/python/_version.py
index bbab024..d3ec452 100755
--- a/neusomatic/python/_version.py
+++ b/neusomatic/python/_version.py
@@ -1 +1 @@
-__version__ = "0.1.4"
+__version__ = "0.2.0"
diff --git a/neusomatic/python/call.py b/neusomatic/python/call.py
index da375d2..2df7aba 100755
--- a/neusomatic/python/call.py
+++ b/neusomatic/python/call.py
@@ -14,7 +14,7 @@
 
 import pysam
 import numpy as np
-from scipy.misc import imsave, imread
+from imageio import imwrite, imread
 import torch
 from torch.autograd import Variable
 import torch.nn as nn
@@ -88,7 +88,7 @@ def call_variants(net, vartype_classes, call_loader, out_dir, model_tag, use_cud
                 file_name = "{}/matrices_{}/{}.png".format(
                     out_dir, model_tag, path)
                 if not os.path.exists(file_name):
-                    imsave(file_name, non_transformed_matrices[i, :, :, 0:3])
+                    imwrite(file_name, non_transformed_matrices[i, :, :, 0:3])
                 true_path[path] = file_name
                 final_preds[path] = [vartype_classes[predicted[i]], pos_pred[i], len_pred[i],
                                      list(map(lambda x: round(x, 4), F.softmax(
diff --git a/test/NeuSomatic_ensemble.vcf b/test/NeuSomatic_ensemble.vcf
index 92b2aca..e037be6 100644
--- a/test/NeuSomatic_ensemble.vcf
+++ b/test/NeuSomatic_ensemble.vcf
@@ -1,5 +1,5 @@
 ##fileformat=VCFv4.2
-##NeuSomatic Version=0.1.4
+##NeuSomatic Version=0.2.0
 ##FORMAT=<ID=SCORE,Number=1,Type=Float,Description="Prediction probability score">
 ##FILTER=<ID=PASS,Description="Accept as a higher confidence somatic mutation calls with probability score value at least 0.7">
 ##FILTER=<ID=LowQual,Description="Less confident somatic mutation calls with probability score value at least 0.4">
diff --git a/test/NeuSomatic_standalone.vcf b/test/NeuSomatic_standalone.vcf
index 6bee259..47bf078 100644
--- a/test/NeuSomatic_standalone.vcf
+++ b/test/NeuSomatic_standalone.vcf
@@ -1,5 +1,5 @@
 ##fileformat=VCFv4.2
-##NeuSomatic Version=0.1.4
+##NeuSomatic Version=0.2.0
 ##FORMAT=<ID=SCORE,Number=1,Type=Float,Description="Prediction probability score">
 ##FILTER=<ID=PASS,Description="Accept as a higher confidence somatic mutation calls with probability score value at least 0.7">
 ##FILTER=<ID=LowQual,Description="Less confident somatic mutation calls with probability score value at least 0.4">
diff --git a/test/docker_test.sh b/test/docker_test.sh
index b19ba53..a3fafba 100755
--- a/test/docker_test.sh
+++ b/test/docker_test.sh
@@ -10,16 +10,16 @@ if [ ! -f Homo_sapiens.GRCh37.75.dna.chromosome.22.fa ]
 then
 	if [ ! -f Homo_sapiens.GRCh37.75.dna.chromosome.22.fa.gz ]
 	then
-		docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+		docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 		"cd /mnt/example/ && wget ftp://ftp.ensembl.org/pub/release-75//fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa.gz"
 	fi
-	docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+	docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 	"cd /mnt/example/ && gunzip -f Homo_sapiens.GRCh37.75.dna.chromosome.22.fa.gz"
 	
 fi
 if [ ! -f Homo_sapiens.GRCh37.75.dna.chromosome.22.fa.fai ]
 then
-	docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+	docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 	"samtools faidx /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa"
 fi
 rm -rf work_standalone
@@ -27,7 +27,7 @@ rm -rf work_standalone
 
 
 #Stand-alone NeuSomatic test 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "python /opt/neusomatic/neusomatic/python/preprocess.py \
 	--mode call \
 	--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
@@ -45,7 +45,7 @@ docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4
 	--num_threads 1 \
 	--scan_alignments_binary /opt/neusomatic/neusomatic/bin/scan_alignments"
 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "CUDA_VISIBLE_DEVICES= python /opt/neusomatic/neusomatic/python/call.py \
 		--candidates_tsv /mnt/example/work_standalone/dataset/*/candidates*.tsv \
 		--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
@@ -54,7 +54,7 @@ docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neu
 		--num_threads 1 \
 		--batch_size 100"
 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "python /opt/neusomatic/neusomatic/python/postprocess.py \
 		--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
 		--tumor_bam /mnt/tumor.bam \
@@ -66,7 +66,7 @@ docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4
 
 rm -rf /mnt/example/work_ensemble
 #Ensemble NeuSomatic test 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "python /opt/neusomatic/neusomatic/python/preprocess.py \
 	--mode call \
 	--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
@@ -85,7 +85,7 @@ docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4
 	--ensemble_tsv /mnt/ensemble.tsv \
 	--scan_alignments_binary /opt/neusomatic/neusomatic/bin/scan_alignments"
 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "CUDA_VISIBLE_DEVICES= python /opt/neusomatic/neusomatic/python/call.py \
 		--candidates_tsv /mnt/example/work_ensemble/dataset/*/candidates*.tsv \
 		--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
@@ -95,7 +95,7 @@ docker run -v ${test_dir}:/mnt -u $UID --memory 30G --shm-size 8G msahraeian/neu
 		--ensemble \
 		--batch_size 100"
 
-docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.1.4 /bin/bash -c \
+docker run -v ${test_dir}:/mnt -u $UID --memory 30G  msahraeian/neusomatic:0.2.0 /bin/bash -c \
 "python /opt/neusomatic/neusomatic/python/postprocess.py \
 		--reference /mnt/example/Homo_sapiens.GRCh37.75.dna.chromosome.22.fa \
 		--tumor_bam /mnt/tumor.bam \

From 46e772277284aab677d54dc3968862b4232053ab Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb125hmem.ib.rsshpc1.sc1.science.roche.com>
Date: Wed, 27 Feb 2019 23:35:32 -0800
Subject: [PATCH 5/7] fixed docker file

---
 docker/Dockerfile | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 861e9e0..ee811b0 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -7,14 +7,14 @@ ENV SCIPY_VERSION 1.2.0
 ENV IMAGEIO_VERSION 2.5.0
 ENV PYTORCH_VERSION 1.0.1
 ENV TORCHVISION_VERSION 0.2.1
-ENV CUDATOOLKIT_VERSION=8.0
+ENV CUDATOOLKIT_VERSION 8.0
 ENV CMAKE_VERSION 3.13.2
 ENV PYBEDTOOLS_VERSION 0.8.0
 ENV PYSAM_VERSION 0.15.2
 ENV SAMTOOLS_VERSION 1.9
 ENV TABIX_VERSION 0.2.6
 ENV BEDTOOLS_VERSION 2.27.1
-ENV BIOPYTHON_VERSION 1.73
+ENV BIOPYTHON_VERSION 1.72
 ENV GCC_VERSION 5
 
 RUN apt-get update && apt-get install -y --fix-missing \
@@ -29,15 +29,16 @@ ENV LD_LIBRARY_PATH=/miniconda/lib:${LD_LIBRARY_PATH}
 RUN conda update -y conda
 
 
-RUN conda install -y zlib=${ZLIB_VERSION} numpy=${NUMPY_VERSION} scipy=${SCIPY_VERSION}\
-					 imageio=${IMAGEIO_VERSION} cmake=${CMAKE_VERSION} && conda clean -a
+RUN conda install -y zlib=${ZLIB_VERSION} numpy=${NUMPY_VERSION} scipy=${SCIPY_VERSION} \
+					 imageio=${IMAGEIO_VERSION} && conda clean -a
+RUN conda install -y cmake=${CMAKE_VERSION} -c conda-forge && conda clean -a
 RUN conda install -y pysam=${PYSAM_VERSION} pybedtools=${PYBEDTOOLS_VERSION} \
 					 samtools=${SAMTOOLS_VERSION} tabix=${TABIX_VERSION} \
 					 bedtools=${BEDTOOLS_VERSION} \
 					 biopython=${BIOPYTHON_VERSION} -c bioconda && conda clean -a
 RUN conda install -y pytorch=${PYTORCH_VERSION} \
 					 torchvision=${TORCHVISION_VERSION} \
-					 cudatoolkit=${CUDATOOLKIT_VERSION}-c pytorch && conda clean -a
+					 cudatoolkit=${CUDATOOLKIT_VERSION} -c pytorch && conda clean -a
 
 RUN apt-get install -y --fix-missing gcc-${GCC_VERSION} g++-${GCC_VERSION}
 

From e77c242142fca4c14eb0e5570e246594716a8b14 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb125hmem.ib.rsshpc1.sc1.science.roche.com>
Date: Thu, 28 Feb 2019 11:20:33 -0800
Subject: [PATCH 6/7] some loggings added

---
 neusomatic/python/call.py  | 4 ++++
 neusomatic/python/train.py | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/neusomatic/python/call.py b/neusomatic/python/call.py
index 2680109..04f9d2b 100755
--- a/neusomatic/python/call.py
+++ b/neusomatic/python/call.py
@@ -404,7 +404,11 @@ def call_neusomatic(candidates_tsv, ref_file, out_dir, checkpoint, num_threads,
     num_channels = 119 if ensemble else 26
     net = NeuSomaticNet(num_channels)
     if use_cuda:
+        logger.info("GPU calling!")
         net.cuda()
+    else:
+        logger.info("CPU calling!")
+
 
     if torch.cuda.device_count() > 1:
         logger.info("We use {} GPUs!".format(torch.cuda.device_count()))
diff --git a/neusomatic/python/train.py b/neusomatic/python/train.py
index 7e4903e..8dbb86e 100755
--- a/neusomatic/python/train.py
+++ b/neusomatic/python/train.py
@@ -204,7 +204,10 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
     num_channels = 119 if ensemble else 26
     net = NeuSomaticNet(num_channels)
     if use_cuda:
+        logger.info("GPU training!")
         net.cuda()
+    else:
+        logger.info("CPU training!")
 
     if torch.cuda.device_count() > 1:
         logger.info("We use {} GPUs!".format(torch.cuda.device_count()))

From df51e9d25b2644c85e3afc15e3291afaa50a1999 Mon Sep 17 00:00:00 2001
From: Sahraeian <sahraeis@lb069login.ib.rsshpc1.sc1.science.roche.com>
Date: Thu, 28 Feb 2019 16:24:30 -0800
Subject: [PATCH 7/7] pytorch version logging added

---
 neusomatic/python/call.py  | 3 +++
 neusomatic/python/train.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/neusomatic/python/call.py b/neusomatic/python/call.py
index 04f9d2b..0f13cd1 100755
--- a/neusomatic/python/call.py
+++ b/neusomatic/python/call.py
@@ -20,6 +20,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from torchvision import transforms
+import torchvision
 
 from network import NeuSomaticNet
 from dataloader import NeuSomaticDataset
@@ -391,6 +392,8 @@ def call_neusomatic(candidates_tsv, ref_file, out_dir, checkpoint, num_threads,
 
     logger.info("-----------------Call Somatic Mutations--------------------")
 
+    logger.info("PyTorch Version: {}".format(torch.__version__))
+    logger.info("Torchvision Version: {}".format(torchvision.__version__))
     if not use_cuda:
         torch.set_num_threads(num_threads)
 
diff --git a/neusomatic/python/train.py b/neusomatic/python/train.py
index 8dbb86e..a8620d3 100755
--- a/neusomatic/python/train.py
+++ b/neusomatic/python/train.py
@@ -16,6 +16,7 @@
 import torch.nn.functional as F
 import torch.optim as optim
 from torchvision import transforms
+import torchvision
 from random import shuffle
 import pickle
 
@@ -191,6 +192,8 @@ def train_neusomatic(candidates_tsv, validation_candidates_tsv, out_dir, checkpo
     logger = logging.getLogger(train_neusomatic.__name__)
 
     logger.info("----------------Train NeuSomatic Network-------------------")
+    logger.info("PyTorch Version: {}".format(torch.__version__))
+    logger.info("Torchvision Version: {}".format(torchvision.__version__))
 
     if not os.path.exists(out_dir):
         os.mkdir(out_dir)