Merge pull request #100 from CenterForMedicalGeneticsGhent/chore/pret…

…tier lint code with black
CenterForMedicalGeneticsGhent · Feb 3, 2023 · 1667619 · 1667619
2 parents dc53a99 + a870763
commit 1667619
Show file tree

Hide file tree

Showing 11 changed files with 985 additions and 757 deletions.
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,7 +1,7 @@
 Copyright (C) 2016 VU University Medical Center Amsterdam  
 Author: Roy Straver (github.com/rstraver)  
-Mod: Lennart Raman (github.com/leraman)  
+Mod: Lennart Raman (github.com/leraman)
 
 WISECONDOR is distributed under the following license:  
-[Attribution-NonCommercial-ShareAlike CC BY-NC-SA]( https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode)  
+[Attribution-NonCommercial-ShareAlike CC BY-NC-SA](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode)  
 This license is governed by Dutch law and this license is subject to the exclusive jurisdiction of the courts of the Netherlands.
diff --git a/README.md b/README.md
diff --git a/setup.py b/setup.py
@@ -1,48 +1,55 @@
 #! /usr/bin/env python
 from setuptools import setup, find_packages
 
-version = '1.2.4'
-dl_version = 'master' if 'dev' in version else '{}'.format(version)
+version = "1.2.4"
+dl_version = "master" if "dev" in version else "{}".format(version)
 
 setup(
-    name='WisecondorX',
+    name="WisecondorX",
     version=version,
-    author='Matthias De Smet, Lennart Raman',
-    author_email='[email protected]',
+    author="Matthias De Smet, Lennart Raman",
+    author_email="[email protected]",
     description="WisecondorX -- an evolved WISECONDOR",
     long_description=__doc__,
-    keywords=['bioinformatics', 'biology', 'sequencing', 'NGS', 'next generation sequencing',
-              'CNV', 'SWGS', 'Shallow Whole Genome Sequencing'],
-    download_url='https://github.com/CenterForMedicalGeneticsGhent/WisecondorX/archive/v{}.tar.gz'.format(
-        dl_version),
-    license='Attribution-NonCommercial-ShareAlike CC BY-NC-SA',
-    packages=find_packages('.'),
-    python_requires='>=2.7',
+    keywords=[
+        "bioinformatics",
+        "biology",
+        "sequencing",
+        "NGS",
+        "next generation sequencing",
+        "CNV",
+        "SWGS",
+        "Shallow Whole Genome Sequencing",
+    ],
+    download_url="https://github.com/CenterForMedicalGeneticsGhent/WisecondorX/archive/v{}.tar.gz".format(
+        dl_version
+    ),
+    license="Attribution-NonCommercial-ShareAlike CC BY-NC-SA",
+    packages=find_packages("."),
+    python_requires=">=2.7",
     include_package_data=True,
     zip_safe=False,
     install_requires=[
         'futures;python_version<"3"',
-        'scipy',
-        'scikit-learn',
-        'pysam',
-        'numpy'
+        "scipy",
+        "scikit-learn",
+        "pysam",
+        "numpy",
     ],
-    entry_points={
-        'console_scripts': ['WisecondorX = wisecondorX.main:main']
-    },
+    entry_points={"console_scripts": ["WisecondorX = wisecondorX.main:main"]},
     classifiers=[
-        'Development Status :: 3 - Alpha',
-        'Environment :: Console',
-        'Intended Audience :: Science/Research',
-        'Natural Language :: English',
-        'Operating System :: MacOS :: MacOS X',
-        'Operating System :: POSIX',
-        'Operating System :: Unix',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Topic :: Scientific/Engineering',
-        'Topic :: Scientific/Engineering :: Bio-Informatics'
-    ]
+        "Development Status :: 3 - Alpha",
+        "Environment :: Console",
+        "Intended Audience :: Science/Research",
+        "Natural Language :: English",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: POSIX",
+        "Operating System :: Unix",
+        "Programming Language :: Python :: 2",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
+    ],
 )
diff --git a/wisecondorX/convert_tools.py b/wisecondorX/convert_tools.py
@@ -6,30 +6,35 @@
 import pysam
 import sys
 
-'''
+"""
 Converts aligned reads file to numpy array by transforming
 individual reads to counts per bin.
-'''
+"""
 
 
 def convert_reads(args):
     bins_per_chr = dict()
     for chr in range(1, 25):
         bins_per_chr[str(chr)] = None
 
-    logging.info('Importing data ...')
+    logging.info("Importing data ...")
 
     if args.infile.endswith(".bam"):
-        reads_file = pysam.AlignmentFile(args.infile, 'rb')
+        reads_file = pysam.AlignmentFile(args.infile, "rb")
     elif args.infile.endswith(".cram"):
         if args.reference is not None:
-            reads_file = pysam.AlignmentFile(args.infile, 'rc', reference_filename=args.reference)
+            reads_file = pysam.AlignmentFile(
+                args.infile, "rc", reference_filename=args.reference
+            )
         else:
-            logging.error("Cram support requires a reference file, please use the --reference argument")
+            logging.error(
+                "Cram support requires a reference file, please use the --reference argument"
+            )
             sys.exit(1)
     else:
         logging.error(
-            "Unsupported input file type. Make sure your input filename has a correct extension ( bam or cram)")
+            "Unsupported input file type. Make sure your input filename has a correct extension ( bam or cram)"
+        )
         sys.exit(1)
 
     reads_seen = 0
@@ -40,32 +45,41 @@ def convert_reads(args):
     larp = -1
     larp2 = -1
 
-    logging.info('Converting aligned reads ... This might take a while ...')
+    logging.info("Converting aligned reads ... This might take a while ...")
 
     for index, chr in enumerate(reads_file.references):
 
         chr_name = chr
-        if chr_name[:3].lower() == 'chr':
+        if chr_name[:3].lower() == "chr":
             chr_name = chr_name[3:]
-        if chr_name not in bins_per_chr and chr_name != 'X' and chr_name != 'Y':
+        if chr_name not in bins_per_chr and chr_name != "X" and chr_name != "Y":
             continue
 
-        logging.info('Working at {}; processing {} bins'
-                     .format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1)))
-        counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
+        logging.info(
+            "Working at {}; processing {} bins".format(
+                chr, int(reads_file.lengths[index] / float(args.binsize) + 1)
+            )
+        )
+        counts = np.zeros(
+            int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32
+        )
         bam_chr = reads_file.fetch(chr)
 
-        if chr_name == 'X':
-            chr_name = '23'
-        if chr_name == 'Y':
-            chr_name = '24'
+        if chr_name == "X":
+            chr_name = "23"
+        if chr_name == "Y":
+            chr_name = "24"
 
         for read in bam_chr:
             if read.is_paired:
                 if not read.is_proper_pair:
                     reads_pairf += 1
                     continue
-                if not args.normdup and larp == read.pos and larp2 == read.next_reference_start:
+                if (
+                    not args.normdup
+                    and larp == read.pos
+                    and larp2 == read.next_reference_start
+                ):
                     reads_rmdup += 1
                 else:
                     if read.mapping_quality >= 1:
@@ -93,12 +107,14 @@ def convert_reads(args):
         bins_per_chr[chr_name] = counts
         reads_kept += sum(counts)
 
-    qual_info = {'mapped': reads_file.mapped,
-                 'unmapped': reads_file.unmapped,
-                 'no_coordinate': reads_file.nocoordinate,
-                 'filter_rmdup': reads_rmdup,
-                 'filter_mapq': reads_mapq,
-                 'pre_retro': reads_seen,
-                 'post_retro': reads_kept,
-                 'pair_fail': reads_pairf}
+    qual_info = {
+        "mapped": reads_file.mapped,
+        "unmapped": reads_file.unmapped,
+        "no_coordinate": reads_file.nocoordinate,
+        "filter_rmdup": reads_rmdup,
+        "filter_mapq": reads_mapq,
+        "pre_retro": reads_seen,
+        "post_retro": reads_kept,
+        "pair_fail": reads_pairf,
+    }
     return bins_per_chr, qual_info