diff --git a/README.md b/README.md
index cebc9f6..b9201db 100755
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ conda install -f -c conda-forge -c bioconda wisecondorx
### Running WisecondorX
There are three main stages (converting, reference creating and predicting) when using WisecondorX:
-- Convert .bam to .npz files (for both reference and test samples)
+- Convert aligned reads to .npz files (for both reference and test samples)
- Create a reference (using reference .npz files)
- **Important notes**
- Automated gender prediction, required to consistently analyze sex chromosomes, is based on a Gaussian mixture
@@ -56,11 +56,11 @@ There are three main stages (converting, reference creating and predicting) when
observe additional improvement concerning normalization.
- Predict copy number alterations (using the reference file and test .npz cases of interest)
-### Stage (1) Convert .bam to .npz
+### Stage (1) Convert aligned reads (sam/bam/cram) to .npz
```bash
-WisecondorX convert input.bam output.npz [--optional arguments]
+WisecondorX convert input.sam/bam/cram output.npz [--optional arguments]
```
Optional argument
| Function
diff --git a/wisecondorX/convert_tools.py b/wisecondorX/convert_tools.py
index ce5488e..41966d6 100644
--- a/wisecondorX/convert_tools.py
+++ b/wisecondorX/convert_tools.py
@@ -4,21 +4,31 @@
import numpy as np
import pysam
+import sys
'''
-Converts bam file to numpy array by transforming
+Converts aligned reads file to numpy array by transforming
individual reads to counts per bin.
'''
-def convert_bam(args):
+def convert_reads(args):
bins_per_chr = dict()
for chr in range(1, 25):
bins_per_chr[str(chr)] = None
logging.info('Importing data ...')
- bam_file = pysam.AlignmentFile(args.infile, 'rb')
+ if args.infile.endswith(".sam"):
+ reads_file = pysam.AlignmentFile(args.infile, 'r')
+ elif args.infile.endswith(".bam"):
+ reads_file = pysam.AlignmentFile(args.infile, 'rb')
+ elif args.infile.endswith(".cram"):
+ reads_file = pysam.AlignmentFile(args.infile, 'rc')
+ else:
+ logging.error(
+ "Unsupported input file type. Make sure your input filename has a correct extension (sam/bam/cram)")
+ sys.exit(1)
reads_seen = 0
reads_kept = 0
@@ -28,9 +38,9 @@ def convert_bam(args):
larp = -1
larp2 = -1
- logging.info('Converting bam ... This might take a while ...')
+ logging.info('Converting aligned reads ... This might take a while ...')
- for index, chr in enumerate(bam_file.references):
+ for index, chr in enumerate(reads_file.references):
chr_name = chr
if chr_name[:3].lower() == 'chr':
@@ -39,9 +49,9 @@ def convert_bam(args):
continue
logging.info('Working at {}; processing {} bins'
- .format(chr, int(bam_file.lengths[index] / float(args.binsize) + 1)))
- counts = np.zeros(int(bam_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
- bam_chr = bam_file.fetch(chr)
+ .format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1)))
+ counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
+ bam_chr = reads_file.fetch(chr)
if chr_name == 'X':
chr_name = '23'
@@ -81,9 +91,9 @@ def convert_bam(args):
bins_per_chr[chr_name] = counts
reads_kept += sum(counts)
- qual_info = {'mapped': bam_file.mapped,
- 'unmapped': bam_file.unmapped,
- 'no_coordinate': bam_file.nocoordinate,
+ qual_info = {'mapped': reads_file.mapped,
+ 'unmapped': reads_file.unmapped,
+ 'no_coordinate': reads_file.nocoordinate,
'filter_rmdup': reads_rmdup,
'filter_mapq': reads_mapq,
'pre_retro': reads_seen,
diff --git a/wisecondorX/main.py b/wisecondorX/main.py
index 49888d6..01183a0 100755
--- a/wisecondorX/main.py
+++ b/wisecondorX/main.py
@@ -8,7 +8,7 @@
import numpy as np
-from wisecondorX.convert_tools import convert_bam
+from wisecondorX.convert_tools import convert_reads
from wisecondorX.newref_control import tool_newref_prep, tool_newref_main, tool_newref_merge
from wisecondorX.newref_tools import train_gender_model, get_mask
from wisecondorX.overall_tools import gender_correct, scale_sample
@@ -20,7 +20,7 @@
def tool_convert(args):
logging.info('Starting conversion')
- sample, qual_info = convert_bam(args)
+ sample, qual_info = convert_reads(args)
np.savez_compressed(args.outfile,
binsize=args.binsize,
sample=sample,