diff --git a/jcvi/utils/__main__.py b/jcvi/utils/__main__.py index 1d149a16..f627666a 100644 --- a/jcvi/utils/__main__.py +++ b/jcvi/utils/__main__.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- """ -Assortment of utility scripts implementing recipes from Python cookbooks, such as iterators, sorters, range queries, etc. +Assortment of utility scripts implementing recipes from Python cookbooks, such as iterators, sorters, range queries, etc. """ -from jcvi.apps.base import dmain +from ..apps.base import dmain if __name__ == "__main__": diff --git a/jcvi/utils/aws.py b/jcvi/utils/aws.py index 3d738616..ae3bdae8 100644 --- a/jcvi/utils/aws.py +++ b/jcvi/utils/aws.py @@ -4,31 +4,35 @@ """ AWS-related methods. """ +import fnmatch +import getpass +import json import os import os.path as op import sys -import fnmatch -import boto3 -import json -import logging import time -import getpass from configparser import NoOptionError, NoSectionError from datetime import datetime from multiprocessing import Pool + +import boto3 + from botocore.exceptions import ClientError, ParamValidationError -from jcvi.formats.base import BaseFile, SetFile, timestamp -from jcvi.utils.console import console -from jcvi.apps.base import ( - OptionParser, +from ..apps.base import ( ActionDispatcher, + OptionParser, datafile, get_config, + logger, popen, sh, ) +from ..formats.base import BaseFile, SetFile, timestamp + +from .console import console + AWS_CREDS_PATH = "%s/.aws/credentials" % (op.expanduser("~"),) @@ -165,7 +169,7 @@ def start(args): # Make sure the instance id is empty instance_id = s.instance_id if instance_id != "": - logging.error("Instance exists {}".format(instance_id)) + logger.error("Instance exists {}".format(instance_id)) sys.exit(1) launch_spec = s.launch_spec @@ -208,14 +212,14 @@ def start(args): if "InstanceId" in response["SpotInstanceRequests"][0]: instance_id = response["SpotInstanceRequests"][0]["InstanceId"] else: - logging.debug("Waiting to be fulfilled ...") + logger.debug("Waiting to be fulfilled ...") time.sleep(10) # Check if the instance is running print("Instance id {}".format(instance_id), file=sys.stderr) status = "" while status != "running": - logging.debug("Waiting instance to run ...") + logger.debug("Waiting instance to run ...") time.sleep(3) response = client.describe_instance_status(InstanceIds=[instance_id]) if len(response["InstanceStatuses"]) > 0: @@ -271,7 +275,7 @@ def stop(args): # Make sure the instance id is NOT empty instance_id = s.instance_id if instance_id == "": - logging.error("Cannot find instance_id {}".format(instance_id)) + logger.error("Cannot find instance_id {}".format(instance_id)) sys.exit(1) block_device_mappings = [] @@ -289,7 +293,7 @@ def stop(args): image_status = "" while image_status != "available": - logging.debug("Waiting for image to be ready") + logger.debug("Waiting for image to be ready") time.sleep(10) response = client.describe_images(ImageIds=[new_image_id]) image_status = response["Images"][0]["State"] @@ -488,7 +492,7 @@ def check_exists_s3(s3_store_obj_name: str, warn=False) -> bool: counts = int(popen(cmd).read()) exists = counts != 0 if exists and warn: - logging.debug("{} exists. Skipped.".format(s3_store_obj_name)) + logger.debug("{} exists. Skipped.".format(s3_store_obj_name)) return exists @@ -577,7 +581,7 @@ def validate(args, config): ) else: role_msg = "" - logging.info("Validating credentials for profile: %s %s" % (profile, role_msg)) + logger.info("Validating credentials for profile: %s %s" % (profile, role_msg)) reup_message = "Obtaining credentials for a new role or profile." try: @@ -642,7 +646,7 @@ def validate(args, config): for option in required_options: short_term[option] = config.get(profile, option) except NoOptionError: - logging.warning( + logger.warning( "Your existing credentials are missing or invalid, " "obtaining new credentials." ) @@ -654,12 +658,12 @@ def validate(args, config): current_role = None if args.force: - logging.info("Forcing refresh of credentials.") + logger.info("Forcing refresh of credentials.") force_refresh = True # There are not credentials for an assumed role, # but the user is trying to assume one elif current_role is None and args.assume_role: - logging.info(reup_message) + logger.info(reup_message) force_refresh = True # There are current credentials for a role and # the role arn being provided is the same. @@ -676,12 +680,12 @@ def validate(args, config): and args.assume_role and current_role != args.assume_role ): - logging.info(reup_message) + logger.info(reup_message) force_refresh = True # There are credentials for a current role and no role arn is # being supplied elif current_role is not None and args.assume_role is None: - logging.info(reup_message) + logger.info(reup_message) force_refresh = True should_refresh = True @@ -691,10 +695,10 @@ def validate(args, config): exp = datetime.strptime(config.get(profile, "expiration"), "%Y-%m-%d %H:%M:%S") diff = exp - datetime.utcnow() if diff.total_seconds() <= 0: - logging.info("Your credentials have expired, renewing.") + logger.info("Your credentials have expired, renewing.") else: should_refresh = False - logging.info( + logger.info( "Your credentials are still valid for %s seconds" " they will expire at %s" % (diff.total_seconds(), exp) ) @@ -714,7 +718,7 @@ def get_credentials(profile, args, config): if args.assume_role: - logging.info( + logger.info( "Assuming Role - Profile: %s, Role: %s, Duration: %s", profile, args.assume_role, @@ -747,7 +751,7 @@ def get_credentials(profile, args, config): args.assume_role, ) else: - logging.info( + logger.info( "Fetching Credentials - Profile: %s, Duration: %s", profile, args.duration ) try: @@ -790,7 +794,7 @@ def get_credentials(profile, args, config): with open(AWS_CREDS_PATH, "w") as configfile: config.write(configfile) - logging.info( + logger.info( "Success! Your credentials will expire in %s seconds at: %s" % (args.duration, response["Credentials"]["Expiration"]) ) @@ -798,7 +802,7 @@ def get_credentials(profile, args, config): def log_error_and_exit(message): """Log an error message and exit with error""" - logging.error(message) + logger.error(message) sys.exit(1) diff --git a/jcvi/utils/cbook.py b/jcvi/utils/cbook.py index 32ef8b6e..a2712ffb 100644 --- a/jcvi/utils/cbook.py +++ b/jcvi/utils/cbook.py @@ -6,10 +6,11 @@ import os.path as op import re import sys -import logging from collections import defaultdict +from ..apps.base import logger + def inspect(item, maxchar=80): """ @@ -37,7 +38,7 @@ def timed(*args, **kw): te = time.time() msg = "{0}{1} {2:.2f}s".format(func.__name__, args, te - ts) - logging.debug(msg) + logger.debug(msg) return result @@ -149,7 +150,7 @@ def tofile(self, filename): for x in self.data: print(x, file=fw) fw.close() - logging.debug( + logger.debug( "Array of size {0} written to file `{1}`.".format(self.size, filename) ) diff --git a/jcvi/utils/db.py b/jcvi/utils/db.py index 043abb49..6e2e4b94 100644 --- a/jcvi/utils/db.py +++ b/jcvi/utils/db.py @@ -5,13 +5,12 @@ Connect to databases (Sybase, MySQL and PostgreSQL database backends) """ import os.path as op -import sys -import logging import re +import sys -from jcvi.formats.base import must_open -from jcvi.apps.base import OptionParser, ActionDispatcher, sh, getusername -from jcvi.utils.cbook import AutoVivification +from ..apps.base import ActionDispatcher, OptionParser, getusername, logger, sh +from ..formats.base import must_open +from ..utils.cbook import AutoVivification # set up valid database connection params @@ -299,10 +298,8 @@ def query(args): else: for qry in qrys: if re.search(r"{\d+}", qry): - logging.error( - "Query `{0}` contains placeholders, no datafile(s) specified".format( - qry - ) + logger.error( + "Query `%s` contains placeholders, no datafile(s) specified", qry ) sys.exit() queries.add(qry) diff --git a/jcvi/utils/range.py b/jcvi/utils/range.py index 837d2e8c..d12043e2 100644 --- a/jcvi/utils/range.py +++ b/jcvi/utils/range.py @@ -11,6 +11,7 @@ from collections import namedtuple, defaultdict from itertools import groupby + from more_itertools import pairwise diff --git a/jcvi/utils/taxonomy.py b/jcvi/utils/taxonomy.py index 50435dce..dd79fec4 100644 --- a/jcvi/utils/taxonomy.py +++ b/jcvi/utils/taxonomy.py @@ -28,7 +28,6 @@ """ import sys import time -import logging from functools import lru_cache @@ -40,7 +39,7 @@ from ClientForm import ParseResponse from BeautifulSoup import BeautifulSoup -from jcvi.apps.base import OptionParser, ActionDispatcher +from ..apps.base import ActionDispatcher, OptionParser, logger URL = "http://itol.embl.de/other_trees.shtml" @@ -65,8 +64,8 @@ def __init__(self, list_of_taxids): response = urlopen(URL) success = True except (URLError, HTTPError, RuntimeError) as e: - logging.error(e) - logging.debug("wait 5 seconds to reconnect...") + logger.error(e) + logger.debug("wait 5 seconds to reconnect...") time.sleep(5) forms = ParseResponse(response, backwards_compat=False) diff --git a/jcvi/utils/webcolors.py b/jcvi/utils/webcolors.py index 7cbcdd46..36acee88 100755 --- a/jcvi/utils/webcolors.py +++ b/jcvi/utils/webcolors.py @@ -8,12 +8,12 @@ # Copyright © 2021 Haibao Tang. All rights reserved. # -import logging - import numpy as np -from webcolors import CSS3_HEX_TO_NAMES, hex_to_rgb from skimage.color import rgb2lab, deltaE_cmc +from webcolors import CSS3_HEX_TO_NAMES, hex_to_rgb + +from ..apps.base import logger def color_diff(rgb1, rgb2): @@ -37,12 +37,12 @@ def closest_color(requested_color): """ Find closest color name for the request RGB tuple. """ - logging.disable(logging.DEBUG) + logger.disable(logger.DEBUG) colors = [] for key, name in CSS3_HEX_TO_NAMES.items(): diff = color_diff(hex_to_rgb(key), requested_color) colors.append((diff, name)) - logging.disable(logging.NOTSET) + logger.disable(logger.NOTSET) _, min_color = min(colors) return min_color diff --git a/jcvi/variation/__main__.py b/jcvi/variation/__main__.py index d23340c5..175c1d19 100644 --- a/jcvi/variation/__main__.py +++ b/jcvi/variation/__main__.py @@ -4,7 +4,7 @@ Set of scripts relating to variation studies such as imputation, phasing, SNP/CNV analysis, and other supporting routines """ -from jcvi.apps.base import dmain +from ..apps.base import dmain if __name__ == "__main__": diff --git a/jcvi/variation/cnv.py b/jcvi/variation/cnv.py index 00e0056d..50737f35 100644 --- a/jcvi/variation/cnv.py +++ b/jcvi/variation/cnv.py @@ -4,31 +4,40 @@ """ Helper functions for Copy Number Variations (CNV). """ -import sys import logging import os.path as op - -import numpy as np -import numpy.ma as ma -import pandas as pd -import pysam +import sys from collections import Counter, defaultdict from dataclasses import dataclass from itertools import groupby from multiprocessing import Pool from random import choice + +import numpy as np +import numpy.ma as ma +import pandas as pd +import pysam + from pybedtools import BedTool, cleanup, set_tempdir -from jcvi.algorithms.formula import get_kmeans -from jcvi.apps.grid import MakeManager -from jcvi.utils.aws import glob_s3, push_to_s3, sync_from_s3 -from jcvi.utils.cbook import percentage -from jcvi.apps.base import OptionParser, ActionDispatcher, getfilesize, mkdir, popen, sh +from ..algorithms.formula import get_kmeans +from ..apps.base import ( + ActionDispatcher, + OptionParser, + getfilesize, + logger, + mkdir, + popen, + sh, +) +from ..apps.grid import MakeManager +from ..utils.aws import glob_s3, push_to_s3, sync_from_s3 +from ..utils.cbook import percentage logging.getLogger("matplotlib").setLevel(logging.WARNING) -autosomes = ["chr{}".format(x) for x in range(1, 23)] +autosomes = [f"chr{x}" for x in range(1, 23)] sexsomes = ["chrX", "chrY"] allsomes = autosomes + sexsomes # See: http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/ @@ -591,7 +600,7 @@ def gcn(args): def vcf_to_df_worker(arg): """Convert CANVAS vcf to a dict, single thread""" canvasvcf, exonbed, i = arg - logging.debug("Working on job {}: {}".format(i, canvasvcf)) + logger.debug("Working on job {}: {}".format(i, canvasvcf)) samplekey = op.basename(canvasvcf).split(".")[0].rsplit("_", 1)[0] d = {"SampleKey": samplekey} @@ -786,7 +795,7 @@ def bam_to_cib(arg): bamfile, seq, samplekey = arg bam = pysam.AlignmentFile(bamfile, "rb") name, length = seq["SN"], seq["LN"] - logging.debug("Computing depth for {} (length={})".format(name, length)) + logger.debug("Computing depth for {} (length={})".format(name, length)) pileup = bam.pileup(name) a = np.ones(length, dtype=np.int8) * -128 for x in pileup: @@ -794,7 +803,7 @@ def bam_to_cib(arg): cibfile = op.join(samplekey, "{}.{}.cib".format(samplekey, name)) a.tofile(cibfile) - logging.debug("Depth written to `{}`".format(cibfile)) + logger.debug("Depth written to `{}`".format(cibfile)) def cib(args): @@ -823,7 +832,7 @@ def cib(args): for r in refs: task_args.append((bamfile, r, samplekey)) cpus = min(opts.cpus, len(task_args)) - logging.debug("Use {} cpus".format(cpus)) + logger.debug("Use {} cpus".format(cpus)) p = Pool(processes=cpus) for _ in p.imap(bam_to_cib, task_args): @@ -865,7 +874,7 @@ def batchcn(args): continue print(" ".join((cmd, samplekey, path))) - logging.debug("Skipped: {}".format(percentage(nskipped, ntotal))) + logger.debug("Skipped: {}".format(percentage(nskipped, ntotal))) def hmm(args): @@ -911,7 +920,7 @@ def hmm(args): print(" ".join((event.bedline, sample_key)), file=fw) nevents += 1 fw.close() - logging.debug( + logger.debug( "A total of {} aberrant events written to `{}`".format(nevents, hmmfile) ) return hmmfile @@ -936,7 +945,7 @@ def batchccn(args): header = next(open(csvfile)) header = None if header.strip().endswith(".bam") else "infer" - logging.debug("Header={}".format(header)) + logger.debug("Header={}".format(header)) df = pd.read_csv(csvfile, header=header) cmd = "perl /mnt/software/ccn_gcn_hg38_script/ccn_gcn_hg38.pl" cmd += " -n {} -b {}" @@ -981,9 +990,7 @@ def mergecn(args): idx = get_kmeans(chr_med, k=2) zero_med = np.median(chr_med[idx == 0]) one_med = np.median(chr_med[idx == 1]) - logging.debug( - "K-means with {} c0:{} c1:{}".format(seqid, zero_med, one_med) - ) + logger.debug("K-means with {} c0:{} c1:{}".format(seqid, zero_med, one_med)) higher_idx = 1 if one_med > zero_med else 0 # Use the higher mean coverage componen arrays = np.array(arrays)[idx == higher_idx] @@ -1003,7 +1010,7 @@ def mergecn(args): stdfile = op.join(betadir, "{}.std".format(seqid)) std = np.array(std) std.tofile(stdfile) - logging.debug("Written to `{}`".format(betafile)) + logger.debug("Written to `{}`".format(betafile)) ar.tofile("{}.bin".format(seqid)) @@ -1042,7 +1049,7 @@ def build_gc_array(fastafile="/mnt/ref/hg38.upper.fa", gcdir="gc", n=1000): mkdir(gcdir) for seqid in allsomes: if seqid not in f: - logging.debug("Seq {} not found. Continue anyway.".format(seqid)) + logger.debug("Seq {} not found. Continue anyway.".format(seqid)) continue c = np.array(f[seqid]) gc = (c == "G") | (c == "C") # If base is GC @@ -1108,7 +1115,7 @@ def cn(args): cndir = op.join(workdir, sample_key + "-cn") if op.exists(cndir): - logging.debug("Directory {} exists. Skipped.".format(cndir)) + logger.debug("Directory {} exists. Skipped.".format(cndir)) return gcdir = "gc" @@ -1125,7 +1132,7 @@ def cn(args): for seqid in allsomes: gcfile = op.join(gcdir, "{}.{}.gc".format(seqid, n)) if not op.exists(gcfile): - logging.error("File {} not found. Continue anyway.".format(gcfile)) + logger.error("File {} not found. Continue anyway.".format(gcfile)) continue gc = np.fromfile(gcfile, dtype=np.uint8) cibfile = op.join(sampledir, "{}.{}.cib".format(sample_key, seqid)) @@ -1279,7 +1286,7 @@ def validate(args): cc = (rdr + baf + comp + vaf).cols(1) htmlfile = f"{sample}.html" hv.save(cc, htmlfile) - logging.info("Report written to `%s`", htmlfile) + logger.info("Report written to `%s`", htmlfile) def get_segments(rfx: pd.DataFrame): @@ -1390,7 +1397,7 @@ def get_CNV_records(vcffile: str) -> list[CNV]: (cn,) = record.format("CN")[0] record = CNV(chr, start, end, type, name, is_pass, cn) records.append(record) - logging.info("A total of %d records imported", len(records)) + logger.info("A total of %d records imported", len(records)) return records @@ -1497,7 +1504,7 @@ def wes_vs_wgs(args): ) htmlfile = f"{sample}.html" hv.save(cc, htmlfile) - logging.info("Report written to `%s`", htmlfile) + logger.info("Report written to `%s`", htmlfile) if __name__ == "__main__": diff --git a/jcvi/variation/deconvolute.py b/jcvi/variation/deconvolute.py index df8f7409..c525c8a9 100644 --- a/jcvi/variation/deconvolute.py +++ b/jcvi/variation/deconvolute.py @@ -6,7 +6,6 @@ """ import os.path as op import sys -import logging from collections import namedtuple from itertools import product, groupby, islice @@ -15,9 +14,9 @@ from Bio.Data.IUPACData import ambiguous_dna_values from Bio.SeqIO.QualityIO import FastqGeneralIterator -from jcvi.formats.base import FileMerger, must_open -from jcvi.formats.fastq import FastqPairedIterator -from jcvi.apps.base import OptionParser, ActionDispatcher, flatten, mkdir, glob +from ..apps.base import ActionDispatcher, OptionParser, flatten, glob, logger, mkdir +from ..formats.base import FileMerger, must_open +from ..formats.fastq import FastqPairedIterator def main(): @@ -182,7 +181,7 @@ def split(args): barcodes.append(BarcodeLine._make((id, s))) nbc = len(barcodes) - logging.debug("Imported {0} barcodes (ambiguous codes expanded).".format(nbc)) + logger.debug("Imported {0} barcodes (ambiguous codes expanded).".format(nbc)) checkprefix = not opts.nocheckprefix if checkprefix: @@ -196,7 +195,7 @@ def split(args): assert bc.seq != s.seq if s.seq.startswith(bc.seq) and len(s.seq) > len(bc.seq): - logging.error("{0} shares same prefix as {1}.".format(s, bc)) + logger.error("{0} shares same prefix as {1}.".format(s, bc)) exclude.append(s) excludebarcodes.append(exclude) else: @@ -206,7 +205,7 @@ def split(args): mkdir(outdir) cpus = opts.cpus - logging.debug("Create a pool of {0} workers.".format(cpus)) + logger.debug("Create a pool of {0} workers.".format(cpus)) pool = Pool(cpus) if paired: @@ -219,7 +218,7 @@ def split(args): split_fun = split_barcode mode = "single" - logging.debug("Mode: {0}".format(mode)) + logger.debug("Mode: {0}".format(mode)) pool.map( split_fun, zip(barcodes, excludebarcodes, nbc * [outdir], nbc * [fastqfile]) diff --git a/jcvi/variation/delly.py b/jcvi/variation/delly.py index 88c4ae15..71f3200d 100644 --- a/jcvi/variation/delly.py +++ b/jcvi/variation/delly.py @@ -7,13 +7,12 @@ import os.path as op import sys -import logging -from jcvi.formats.base import BaseFile, read_until, must_open -from jcvi.formats.sam import coverage -from jcvi.utils.cbook import percentage -from jcvi.utils.aws import ls_s3, push_to_s3 -from jcvi.apps.base import OptionParser, ActionDispatcher, sh, need_update +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..formats.base import BaseFile, read_until, must_open +from ..formats.sam import coverage +from ..utils.aws import ls_s3, push_to_s3 +from ..utils.cbook import percentage class DelLine(object): @@ -62,7 +61,7 @@ def write_bed(self, bedfile="stdout"): fw = must_open(bedfile, "w") for d in self: print(d.bedline, file=fw) - logging.debug("File written to `%s`.", bedfile) + logger.debug("File written to `%s`.", bedfile) def main(): @@ -113,7 +112,7 @@ def mitosomatic(args): print("{}\t{}\t{:.6f}".format(row["chrom"], row["start"], af), file=fw) fw.close() - logging.debug("Allele freq written to `{}`".format(af_file)) + logger.debug("Allele freq written to `{}`".format(af_file)) def bed(args): @@ -157,7 +156,7 @@ def mitocompile(args): print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split())) for i, vcf in enumerate(vcfs): if (i + 1) % 100 == 0: - logging.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs)))) + logger.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs)))) depthfile = vcf.replace(".sv.vcf.gz", ".depth") fp = must_open(depthfile) _, depth = next(fp).split() @@ -220,7 +219,7 @@ def mito(args): cleanup = not opts.nocleanup if not op.exists(chrMfa): - logging.debug("File `{}` missing. Exiting.".format(chrMfa)) + logger.debug("File `{}` missing. Exiting.".format(chrMfa)) return chrMfai = chrMfa + ".fai" @@ -242,14 +241,14 @@ def mito(args): x for x in bamfiles if op.basename(x).split(".")[0] not in computed ] - logging.debug( + logger.debug( "Already computed on `{}`: {}".format( store, len(bamfiles) - len(remaining_samples) ) ) bamfiles = remaining_samples - logging.debug("Total samples: {}".format(len(bamfiles))) + logger.debug("Total samples: {}".format(len(bamfiles))) for bamfile in bamfiles: run_mito( @@ -273,7 +272,7 @@ def run_mito( if not op.exists(minibam): get_minibam(bamfile, region) else: - logging.debug("{} found. Skipped.".format(minibam)) + logger.debug("{} found. Skipped.".format(minibam)) speedseq_bin = op.join(opts.speedseq_home, "speedseq") diff --git a/jcvi/variation/impute.py b/jcvi/variation/impute.py index 552682c8..3807c206 100644 --- a/jcvi/variation/impute.py +++ b/jcvi/variation/impute.py @@ -5,14 +5,13 @@ Impute unknown variations given an input vcf file. """ import os.path as op -import logging import sys -from jcvi.utils.cbook import percentage -from jcvi.apps.grid import MakeManager -from jcvi.formats.base import must_open -from jcvi.formats.vcf import VcfLine, CM -from jcvi.apps.base import OptionParser, ActionDispatcher +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..apps.grid import MakeManager +from ..formats.base import must_open +from ..formats.vcf import VcfLine, CM +from ..utils.cbook import percentage def main(): @@ -82,7 +81,7 @@ def validate(args): v = VcfLine(row) register[(v.seqid, v.pos)] = v.genotype - logging.debug("Imported {0} records from `{1}`".format(len(register), withheld)) + logger.debug("Imported %d records from `%s`", len(register), withheld) fp = must_open(imputed) hit = concordant = 0 @@ -109,7 +108,7 @@ def validate(args): else: print(row.strip(), "truth={0}".format(truth), file=sys.stderr) - logging.debug("Total concordant: {0}".format(percentage(concordant, hit))) + logger.debug("Total concordant: %s", percentage(concordant, hit)) def minimac(args): diff --git a/jcvi/variation/phase.py b/jcvi/variation/phase.py index 8ce1a043..d1a6f70c 100644 --- a/jcvi/variation/phase.py +++ b/jcvi/variation/phase.py @@ -5,7 +5,6 @@ Read-based phasing. """ import sys -import logging try: import vcf @@ -13,7 +12,7 @@ pass import pysam -from jcvi.apps.base import OptionParser, ActionDispatcher +from ..apps.base import ActionDispatcher, OptionParser, logger class CPRA: @@ -100,10 +99,8 @@ def prepare(args): continue variants.append(v) - logging.debug( - "A total of {} bi-allelic SNVs imported from `{}`".format( - len(variants), vcffile - ) + logger.debug( + "A total of %d bi-allelic SNVs imported from `%s`", len(variants), vcffile ) bamfile = pysam.AlignmentFile(bamfile, "rb") diff --git a/jcvi/variation/snp.py b/jcvi/variation/snp.py index d316326e..f289a45d 100644 --- a/jcvi/variation/snp.py +++ b/jcvi/variation/snp.py @@ -5,12 +5,11 @@ Analyze SNPs in re-sequencing panels. """ import sys -import logging -from jcvi.formats.fasta import Fasta -from jcvi.formats.base import is_number, write_file -from jcvi.apps.grid import MakeManager -from jcvi.apps.base import OptionParser, ActionDispatcher, sh, need_update +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..apps.grid import MakeManager +from ..formats.base import is_number, write_file +from ..formats.fasta import Fasta def main(): @@ -362,10 +361,8 @@ def frommaf(args): ) nsnps += 1 if nsnps % 50000 == 0: - logging.debug("SNPs parsed: {0}".format(percentage(nsnps, total))) - logging.debug( - "A total of {0} SNPs validated and written to `{1}`.".format(nsnps, snpfile) - ) + logger.debug("SNPs parsed: %s", percentage(nsnps, total)) + logger.debug("A total of %d SNPs validated and written to `%s`.", nsnps, snpfile) if __name__ == "__main__": diff --git a/jcvi/variation/str.py b/jcvi/variation/str.py index 072e4b65..b35fc65e 100644 --- a/jcvi/variation/str.py +++ b/jcvi/variation/str.py @@ -10,34 +10,34 @@ import json import sys -try: - import vcf -except ImportError: - pass +from math import log, ceil +from collections import Counter, defaultdict +from multiprocessing import Pool -import logging -import pyfasta import numpy as np import pandas as pd +import pyfasta -from math import log, ceil -from collections import Counter, defaultdict -from multiprocessing import Pool +try: + import vcf +except ImportError: + pass -from jcvi.utils.cbook import percentage, uniqify -from jcvi.formats.base import timestamp -from jcvi.formats.bed import natsorted -from jcvi.apps.grid import MakeManager -from jcvi.formats.base import LineFile, must_open -from jcvi.utils.aws import push_to_s3, pull_from_s3, check_exists_s3, ls_s3 -from jcvi.apps.base import ( - OptionParser, +from ..apps.base import ( ActionDispatcher, + OptionParser, + datafile, + logger, mkdir, need_update, - datafile, sh, ) +from ..apps.grid import MakeManager +from ..formats.base import LineFile, must_open +from ..formats.base import timestamp +from ..formats.bed import natsorted +from ..utils.aws import check_exists_s3, ls_s3, pull_from_s3, push_to_s3 +from ..utils.cbook import percentage, uniqify REF = "hg38" @@ -291,7 +291,7 @@ def __init__(self, columnidsfile="STR.ids"): if columnidsfile: fp = open(columnidsfile) self.columns = [x.strip() for x in fp] - logging.debug( + logger.debug( "A total of {} markers imported from `{}`".format( len(self.columns), columnidsfile ) @@ -299,7 +299,7 @@ def __init__(self, columnidsfile="STR.ids"): def parse(self, filename, filtered=True, cleanup=False): self.samplekey = op.basename(filename).split(".")[0] - logging.debug("Parse `{}` (filtered={})".format(filename, filtered)) + logger.debug("Parse `{}` (filtered={})".format(filename, filtered)) fp = must_open(filename) reader = vcf.Reader(fp) for record in reader: @@ -426,17 +426,17 @@ def treds(args): metafile = "TREDs_{}_SEARCH.meta.tsv".format(timestamp()) tf.to_csv(metafile, sep="\t", index=False) - logging.debug("File `{}` written.".format(metafile)) + logger.debug("File `{}` written.".format(metafile)) if opts.csv: metacsvfile = metafile.rsplit(".", 1)[0] + ".csv" tf.to_csv(metacsvfile, index=False) - logging.debug("File `{}` written.".format(metacsvfile)) + logger.debug("File `{}` written.".format(metacsvfile)) pp = df[tags] pp.columns = final_columns datafile = "TREDs_{}_SEARCH.data.tsv".format(timestamp()) pp.to_csv(datafile, sep="\t", index=False) - logging.debug("File `{}` written.".format(datafile)) + logger.debug("File `{}` written.".format(datafile)) mask([datafile, metafile]) @@ -513,12 +513,12 @@ def run_filter(arg): if vcffile.startswith("s3://"): if not check_exists_s3(filteredvcf, warn=True): write_filtered(vcffile, lhome, store=store) - logging.debug("{} written and uploaded.".format(filteredvcf)) + logger.debug("{} written and uploaded.".format(filteredvcf)) else: if need_update(vcffile, filteredvcf): write_filtered(vcffile, lhome, store=None) except Exception as e: - logging.debug("Thread failed! Error: {}".format(e)) + logger.debug("Thread failed! Error: {}".format(e)) def filtervcf(args): @@ -576,7 +576,7 @@ def write_meta(af_file, gene_map, blacklist, filename="meta.tsv"): file=fw, ) fw.close() - logging.debug("Write meta file to `{}`".format(filename)) + logger.debug("Write meta file to `{}`".format(filename)) def read_treds(tredsfile=datafile("TREDs.meta.csv")): @@ -587,7 +587,7 @@ def read_treds(tredsfile=datafile("TREDs.meta.csv")): df = pd.read_csv(tredsfile, sep="\t") treds = set(df["abbreviation"]) - logging.debug("Loaded {} treds from `{}`".format(len(treds), tredsfile)) + logger.debug("Loaded {} treds from `{}`".format(len(treds), tredsfile)) return treds, df @@ -644,7 +644,7 @@ def meta(args): print("\t".join((locus, af, remove)), file=fw) fw.close() - logging.debug("Load gene intersections from `{}`".format(wobed)) + logger.debug("Load gene intersections from `{}`".format(wobed)) fp = open(wobed) gene_map = defaultdict(set) for row in fp: @@ -659,7 +659,7 @@ def meta(args): metafile = "STRs_{}_SEARCH.meta.tsv".format(timestamp()) write_meta(af_file, gene_map, TREDS, filename=metafile) - logging.debug("File `{}` written.".format(metafile)) + logger.debug("File `{}` written.".format(metafile)) def alleles_to_counts(a): @@ -818,7 +818,7 @@ def data(args): fw = open(filteredstrids, "w") print("\n".join(final_columns), file=fw) fw.close() - logging.debug( + logger.debug( "Dropped {} columns; Retained {} columns (`{}`)".format( len(remove), len(final_columns), filteredstrids ) @@ -832,7 +832,7 @@ def data(args): if need_update(databin, filtered_bin): m = df.as_matrix() m.tofile(filtered_bin) - logging.debug("Filtered binary matrix written to `{}`".format(filtered_bin)) + logger.debug("Filtered binary matrix written to `{}`".format(filtered_bin)) # Write data output filtered_tsv = "{}.data.tsv".format(pf) @@ -883,7 +883,7 @@ def mask(args): if mode == "TREDs" or need_update(databin, maskfile): cpus = min(8, len(run_args)) write_mask(cpus, samples, final_columns, run_args, filename=maskfile) - logging.debug("File `{}` written.".format(maskfile)) + logger.debug("File `{}` written.".format(maskfile)) def counts_filter(countsd, nalleles, seqid, cutoff=0.5): @@ -975,12 +975,12 @@ def run_compile(arg): if filename.startswith("s3://"): if not check_exists_s3(csvfile, warn=True): write_csv_ev(filename, filtered, cleanup, store=store) - logging.debug("{} written and uploaded.".format(csvfile)) + logger.debug("{} written and uploaded.".format(csvfile)) else: if need_update(filename, csvfile): write_csv_ev(filename, filtered, cleanup, store=None) except Exception as e: - logging.debug("Thread failed! Error: {}".format(e)) + logger.debug("Thread failed! Error: {}".format(e)) def compilevcf(args): @@ -1026,7 +1026,7 @@ def compilevcf(args): for db in dbs: ids.extend(STRFile(opts.lobstr_home, db=db).ids) uids = uniqify(ids) - logging.debug("Combined: {} Unique: {}".format(len(ids), len(uids))) + logger.debug("Combined: {} Unique: {}".format(len(ids), len(uids))) fw = open(stridsfile, "w") print("\n".join(uids), file=fw) @@ -1312,7 +1312,7 @@ def batchlobstr(args): ) ) fp.close() - logging.debug("Total skipped: {0}".format(percentage(skipped, total))) + logger.debug("Total skipped: {0}".format(percentage(skipped, total))) def lobstr(args): @@ -1374,18 +1374,18 @@ def lobstr(args): gzfile = pf + ".{0}.vcf.gz".format(lbindices[-1]) remotegzfile = "{0}/{1}".format(store, gzfile) if check_exists_s3(remotegzfile): - logging.debug( + logger.debug( "Object `{0}` exists. Computation skipped.".format(remotegzfile) ) return localbamfile = pf + ".bam" localbaifile = localbamfile + ".bai" if op.exists(localbamfile): - logging.debug("BAM file already downloaded.") + logger.debug("BAM file already downloaded.") else: pull_from_s3(bamfile, localbamfile) if op.exists(localbaifile): - logging.debug("BAM index file already downloaded.") + logger.debug("BAM index file already downloaded.") else: remotebaifile = bamfile + ".bai" if check_exists_s3(remotebaifile): @@ -1395,7 +1395,7 @@ def lobstr(args): if check_exists_s3(remotebaifile): pull_from_s3(remotebaifile, localbaifile) else: - logging.debug("BAM index cannot be found in S3!") + logger.debug("BAM index cannot be found in S3!") sh("samtools index {0}".format(localbamfile)) bamfile = localbamfile @@ -1544,7 +1544,7 @@ def lobstrindex(args): print(r, file=newbed) retained += 1 newbed.close() - logging.debug("Retained: {0}".format(percentage(retained, total))) + logger.debug("Retained: {0}".format(percentage(retained, total))) else: newbedfile = trfbed