Skip to content

Commit

Permalink
Merge pull request #100 from CenterForMedicalGeneticsGhent/chore/pret…
Browse files Browse the repository at this point in the history
…tier

lint code with black
  • Loading branch information
matthdsm authored Feb 3, 2023
2 parents dc53a99 + a870763 commit 1667619
Show file tree
Hide file tree
Showing 11 changed files with 985 additions and 757 deletions.
4 changes: 2 additions & 2 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Copyright (C) 2016 VU University Medical Center Amsterdam
Author: Roy Straver (github.com/rstraver)
Mod: Lennart Raman (github.com/leraman)
Mod: Lennart Raman (github.com/leraman)

WISECONDOR is distributed under the following license:
[Attribution-NonCommercial-ShareAlike CC BY-NC-SA]( https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode)
[Attribution-NonCommercial-ShareAlike CC BY-NC-SA](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode)
This license is governed by Dutch law and this license is subject to the exclusive jurisdiction of the courts of the Netherlands.
189 changes: 96 additions & 93 deletions README.md

Large diffs are not rendered by default.

73 changes: 40 additions & 33 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,55 @@
#! /usr/bin/env python
from setuptools import setup, find_packages

version = '1.2.4'
dl_version = 'master' if 'dev' in version else '{}'.format(version)
version = "1.2.4"
dl_version = "master" if "dev" in version else "{}".format(version)

setup(
name='WisecondorX',
name="WisecondorX",
version=version,
author='Matthias De Smet, Lennart Raman',
author_email='[email protected]',
author="Matthias De Smet, Lennart Raman",
author_email="[email protected]",
description="WisecondorX -- an evolved WISECONDOR",
long_description=__doc__,
keywords=['bioinformatics', 'biology', 'sequencing', 'NGS', 'next generation sequencing',
'CNV', 'SWGS', 'Shallow Whole Genome Sequencing'],
download_url='https://github.com/CenterForMedicalGeneticsGhent/WisecondorX/archive/v{}.tar.gz'.format(
dl_version),
license='Attribution-NonCommercial-ShareAlike CC BY-NC-SA',
packages=find_packages('.'),
python_requires='>=2.7',
keywords=[
"bioinformatics",
"biology",
"sequencing",
"NGS",
"next generation sequencing",
"CNV",
"SWGS",
"Shallow Whole Genome Sequencing",
],
download_url="https://github.com/CenterForMedicalGeneticsGhent/WisecondorX/archive/v{}.tar.gz".format(
dl_version
),
license="Attribution-NonCommercial-ShareAlike CC BY-NC-SA",
packages=find_packages("."),
python_requires=">=2.7",
include_package_data=True,
zip_safe=False,
install_requires=[
'futures;python_version<"3"',
'scipy',
'scikit-learn',
'pysam',
'numpy'
"scipy",
"scikit-learn",
"pysam",
"numpy",
],
entry_points={
'console_scripts': ['WisecondorX = wisecondorX.main:main']
},
entry_points={"console_scripts": ["WisecondorX = wisecondorX.main:main"]},
classifiers=[
'Development Status :: 3 - Alpha',
'Environment :: Console',
'Intended Audience :: Science/Research',
'Natural Language :: English',
'Operating System :: MacOS :: MacOS X',
'Operating System :: POSIX',
'Operating System :: Unix',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Bio-Informatics'
]
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Science/Research",
"Natural Language :: English",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX",
"Operating System :: Unix",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Bio-Informatics",
],
)
68 changes: 42 additions & 26 deletions wisecondorX/convert_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,35 @@
import pysam
import sys

'''
"""
Converts aligned reads file to numpy array by transforming
individual reads to counts per bin.
'''
"""


def convert_reads(args):
bins_per_chr = dict()
for chr in range(1, 25):
bins_per_chr[str(chr)] = None

logging.info('Importing data ...')
logging.info("Importing data ...")

if args.infile.endswith(".bam"):
reads_file = pysam.AlignmentFile(args.infile, 'rb')
reads_file = pysam.AlignmentFile(args.infile, "rb")
elif args.infile.endswith(".cram"):
if args.reference is not None:
reads_file = pysam.AlignmentFile(args.infile, 'rc', reference_filename=args.reference)
reads_file = pysam.AlignmentFile(
args.infile, "rc", reference_filename=args.reference
)
else:
logging.error("Cram support requires a reference file, please use the --reference argument")
logging.error(
"Cram support requires a reference file, please use the --reference argument"
)
sys.exit(1)
else:
logging.error(
"Unsupported input file type. Make sure your input filename has a correct extension ( bam or cram)")
"Unsupported input file type. Make sure your input filename has a correct extension ( bam or cram)"
)
sys.exit(1)

reads_seen = 0
Expand All @@ -40,32 +45,41 @@ def convert_reads(args):
larp = -1
larp2 = -1

logging.info('Converting aligned reads ... This might take a while ...')
logging.info("Converting aligned reads ... This might take a while ...")

for index, chr in enumerate(reads_file.references):

chr_name = chr
if chr_name[:3].lower() == 'chr':
if chr_name[:3].lower() == "chr":
chr_name = chr_name[3:]
if chr_name not in bins_per_chr and chr_name != 'X' and chr_name != 'Y':
if chr_name not in bins_per_chr and chr_name != "X" and chr_name != "Y":
continue

logging.info('Working at {}; processing {} bins'
.format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1)))
counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
logging.info(
"Working at {}; processing {} bins".format(
chr, int(reads_file.lengths[index] / float(args.binsize) + 1)
)
)
counts = np.zeros(
int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32
)
bam_chr = reads_file.fetch(chr)

if chr_name == 'X':
chr_name = '23'
if chr_name == 'Y':
chr_name = '24'
if chr_name == "X":
chr_name = "23"
if chr_name == "Y":
chr_name = "24"

for read in bam_chr:
if read.is_paired:
if not read.is_proper_pair:
reads_pairf += 1
continue
if not args.normdup and larp == read.pos and larp2 == read.next_reference_start:
if (
not args.normdup
and larp == read.pos
and larp2 == read.next_reference_start
):
reads_rmdup += 1
else:
if read.mapping_quality >= 1:
Expand Down Expand Up @@ -93,12 +107,14 @@ def convert_reads(args):
bins_per_chr[chr_name] = counts
reads_kept += sum(counts)

qual_info = {'mapped': reads_file.mapped,
'unmapped': reads_file.unmapped,
'no_coordinate': reads_file.nocoordinate,
'filter_rmdup': reads_rmdup,
'filter_mapq': reads_mapq,
'pre_retro': reads_seen,
'post_retro': reads_kept,
'pair_fail': reads_pairf}
qual_info = {
"mapped": reads_file.mapped,
"unmapped": reads_file.unmapped,
"no_coordinate": reads_file.nocoordinate,
"filter_rmdup": reads_rmdup,
"filter_mapq": reads_mapq,
"pre_retro": reads_seen,
"post_retro": reads_kept,
"pair_fail": reads_pairf,
}
return bins_per_chr, qual_info
Loading

0 comments on commit 1667619

Please sign in to comment.